[CodeCamp2023-336] New Version of `config` Adapting MAE Algorithm (#1750)
* fix typo MIMHIVIT to MAEHiViT * fix typo MIMHiViT to MAEHiViT * [CodeCamp2023-336] New version of config adapting MAE algorithm * pre-commit check * Revert soft-link modification --------- Co-authored-by: mzr1996 <mzr1996@163.com>pull/1766/head
parent
bff80d3c48
commit
6474d6befa
|
@ -7,7 +7,7 @@ from mmpretrain.datasets import ImageNet, PackInputs, RandomResizedCrop
|
|||
from mmpretrain.models import SelfSupDataPreprocessor
|
||||
|
||||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
dataset_type = ImageNet
|
||||
data_root = 'data/imagenet/'
|
||||
data_preprocessor = dict(
|
||||
type=SelfSupDataPreprocessor,
|
||||
|
@ -34,8 +34,7 @@ train_dataloader = dict(
|
|||
sampler=dict(type=DefaultSampler, shuffle=True),
|
||||
collate_fn=dict(type='default_collate'),
|
||||
dataset=dict(
|
||||
type=ImageNet,
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='meta/train.txt',
|
||||
data_prefix=dict(img_path='train/'),
|
||||
split='train',
|
||||
pipeline=train_pipeline))
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmpretrain.models import (MAE, MAEHiViT, MAEPretrainDecoder,
|
||||
MAEPretrainHead, PixelReconstructionLoss)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type=MAE,
|
||||
backbone=dict(type=MAEHiViT, patch_size=16, arch='base', mask_ratio=0.75),
|
||||
neck=dict(
|
||||
type=MAEPretrainDecoder,
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
embed_dim=512,
|
||||
decoder_embed_dim=512,
|
||||
decoder_depth=6,
|
||||
decoder_num_heads=16,
|
||||
mlp_ratio=4.,
|
||||
),
|
||||
head=dict(
|
||||
type=MAEPretrainHead,
|
||||
norm_pix=True,
|
||||
patch_size=16,
|
||||
loss=dict(type=PixelReconstructionLoss, criterion='L2')),
|
||||
init_cfg=[
|
||||
dict(type='Xavier', layer='Linear', distribution='uniform'),
|
||||
dict(type='Constant', layer='LayerNorm', val=1.0, bias=0.0)
|
||||
])
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=1560,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=1600,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=360,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=400,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=760,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=800,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model.update(
|
||||
backbone=dict(type=MAEHiViT, arch='large'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=1560,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=1600,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model.update(
|
||||
backbone=dict(type=MAEHiViT, arch='large'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=360,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=400,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_hivit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model.update(
|
||||
backbone=dict(type=MAEHiViT, arch='large'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'norm': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=760,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=800,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
find_unused_parameters = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=1560,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=1600,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=360,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=400,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=760,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=800,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model.update(
|
||||
backbone=dict(type=MAEViT, arch='h', patch_size=14),
|
||||
neck=dict(
|
||||
type=MAEPretrainDecoder,
|
||||
embed_dim=1280,
|
||||
patch_size=14,
|
||||
num_patches=256),
|
||||
head=dict(patch_size=14))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=1560,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=1600,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(type=MAEViT, arch='l'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=1560,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=1600,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(type=MAEViT, arch='l'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=260,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=300,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(type=MAEViT, arch='l'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=360,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=400,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# This is a BETA new format config file, and the usage may change recently.
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .._base_.models.mae_vit_base_p16 import *
|
||||
from .._base_.datasets.imagenet_bs512_mae import *
|
||||
from .._base_.default_runtime import *
|
||||
|
||||
from mmengine.hooks.checkpoint_hook import CheckpointHook
|
||||
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
|
||||
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
|
||||
from mmengine.runner.loops import EpochBasedTrainLoop
|
||||
from torch.optim.adamw import AdamW
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(type=MAEViT, arch='l'),
|
||||
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
|
||||
|
||||
# optimizer wrapper
|
||||
optim_wrapper = dict(
|
||||
type=AmpOptimWrapper,
|
||||
loss_scale='dynamic',
|
||||
optimizer=dict(
|
||||
type=AdamW,
|
||||
lr=1.5e-4 * 4096 / 256,
|
||||
betas=(0.9, 0.95),
|
||||
weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'ln': dict(decay_mult=0.0),
|
||||
'bias': dict(decay_mult=0.0),
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'mask_token': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.)
|
||||
}))
|
||||
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type=LinearLR,
|
||||
start_factor=0.0001,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type=CosineAnnealingLR,
|
||||
T_max=760,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=800,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
|
||||
# only keeps the latest 3 checkpoints
|
||||
default_hooks.checkpoint = dict(
|
||||
type=CheckpointHook, interval=1, max_keep_ckpts=3)
|
||||
|
||||
randomness.update(seed=0, diff_rank_seed=True)
|
||||
|
||||
# auto resume
|
||||
resume = True
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR
|
||||
# based on the actual training batch size.
|
||||
auto_scale_lr = dict(base_batch_size=4096)
|
Loading…
Reference in New Issue