Merge remote-tracking branch 'origin/main' into dev

pull/1766/head
mzr1996 2023-08-15 11:37:22 +08:00
commit bf62497e02
28 changed files with 1708 additions and 6 deletions

View File

@ -2,7 +2,7 @@
model = dict(
type='MAE',
backbone=dict(
type='MIMHiViT', patch_size=16, arch='base', mask_ratio=0.75),
type='MAEHiViT', patch_size=16, arch='base', mask_ratio=0.75),
neck=dict(
type='MAEPretrainDecoder',
patch_size=16,

View File

@ -6,7 +6,7 @@ _base_ = [
# model settings
model = dict(
backbone=dict(type='MIMHiViT', arch='large'),
backbone=dict(type='MAEHiViT', arch='large'),
neck=dict(type='MAEPretrainDecoder', embed_dim=768))
# optimizer wrapper

View File

@ -107,6 +107,11 @@ SUN397
.. autoclass:: SUN397
RefCOCO
--------
.. autoclass:: RefCOCO
Dataset Wrappers
----------------

View File

@ -280,6 +280,14 @@ test_dataloader = val_dataloader
Some dataset homepage links may be unavailable, and you can download datasets through [OpenDataLab](https://opendatalab.com/), such as [Stanford Cars](https://opendatalab.com/Stanford_Cars/download).
## Supported Multi-modality Datasets
| Datasets | split | HomePage |
| --------------------------------------------------------------------------------------------- | :----------------------- | ----------------------------------------------------------------------------------- |
| [`RefCOCO`](mmpretrain.datasets.RefCOCO)(data_root, ann_file, data_prefix, split_file[, split, ...]) | ["train", "val", "test"] | [RefCOCO](https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco.zip) Dataset. |
Some dataset homepage links may be unavailable, and you can download datasets through [OpenDataLab](https://opendatalab.com/), such as [RefCOCO](https://opendatalab.com/RefCOCO/download).
## OpenMMLab 2.0 Standard Dataset
In order to facilitate the training of multi-task algorithm models, we unify the dataset interfaces of different tasks. OpenMMLab has formulated the **OpenMMLab 2.0 Dataset Format Specification**. When starting a trainning task, the users can choose to convert their dataset annotation into the specified format, and use the algorithm library of OpenMMLab to perform algorithm training and testing based on the data annotation file.

View File

@ -0,0 +1,53 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.dataset import DefaultSampler, default_collate
from mmpretrain.datasets import (BEiTMaskGenerator, ColorJitter, ImageNet,
LoadImageFromFile, PackInputs, RandomFlip,
RandomResizedCropAndInterpolationWithTwoPic)
from mmpretrain.models import TwoNormDataPreprocessor
dataset_type = ImageNet
data_root = 'data/imagenet/'
data_preprocessor = dict(
type=TwoNormDataPreprocessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
second_mean=[127.5, 127.5, 127.5],
second_std=[127.5, 127.5, 127.5],
to_rgb=True)
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=ColorJitter, brightness=0.4, contrast=0.4, saturation=0.4,
hue=0.),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(
type=RandomResizedCropAndInterpolationWithTwoPic,
size=224,
second_size=224,
interpolation='bicubic',
second_interpolation='bicubic',
scale=(0.2, 1.0)),
dict(
type=BEiTMaskGenerator,
input_size=(14, 14),
num_masking_patches=75,
max_num_patches=75,
min_num_patches=16),
dict(type=PackInputs)
]
train_dataloader = dict(
batch_size=256,
num_workers=8,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
collate_fn=dict(type=default_collate),
dataset=dict(
type=dataset_type,
data_root=data_root,
split='train',
pipeline=train_pipeline))

View File

@ -7,7 +7,7 @@ from mmpretrain.datasets import ImageNet, PackInputs, RandomResizedCrop
from mmpretrain.models import SelfSupDataPreprocessor
# dataset settings
dataset_type = 'ImageNet'
dataset_type = ImageNet
data_root = 'data/imagenet/'
data_preprocessor = dict(
type=SelfSupDataPreprocessor,
@ -34,8 +34,7 @@ train_dataloader = dict(
sampler=dict(type=DefaultSampler, shuffle=True),
collate_fn=dict(type='default_collate'),
dataset=dict(
type=ImageNet,
type=dataset_type,
data_root=data_root,
ann_file='meta/train.txt',
data_prefix=dict(img_path='train/'),
split='train',
pipeline=train_pipeline))

View File

@ -0,0 +1,28 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmpretrain.models import (MAE, MAEHiViT, MAEPretrainDecoder,
MAEPretrainHead, PixelReconstructionLoss)
# model settings
model = dict(
type=MAE,
backbone=dict(type=MAEHiViT, patch_size=16, arch='base', mask_ratio=0.75),
neck=dict(
type=MAEPretrainDecoder,
patch_size=16,
in_chans=3,
embed_dim=512,
decoder_embed_dim=512,
decoder_depth=6,
decoder_num_heads=16,
mlp_ratio=4.,
),
head=dict(
type=MAEPretrainHead,
norm_pix=True,
patch_size=16,
loss=dict(type=PixelReconstructionLoss, criterion='L2')),
init_cfg=[
dict(type='Xavier', layer='Linear', distribution='uniform'),
dict(type='Constant', layer='LayerNorm', val=1.0, bias=0.0)
])

View File

@ -0,0 +1,139 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from ..._base_.datasets.imagenet_bs64_swin_224 import *
from ..._base_.schedules.imagenet_bs1024_adamw_swin import *
from ..._base_.default_runtime import *
from mmengine.hooks import CheckpointHook
from mmengine.model import PretrainedInit, TruncNormalInit
from mmengine.optim import CosineAnnealingLR, LinearLR
from torch.optim import AdamW
from mmpretrain.datasets import LoadImageFromFile, PackInputs, RandomFlip
from mmpretrain.engine.optimizers import \
LearningRateDecayOptimWrapperConstructor
from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss,
LinearClsHead)
from mmpretrain.models.utils.batch_augments import CutMix, Mixup
data_preprocessor = dict(
num_classes=1000,
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
to_rgb=True,
)
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=BEiTViT,
arch='base',
img_size=224,
patch_size=16,
drop_path_rate=0.1,
out_type='avg_featmap',
use_abs_pos_emb=False,
use_rel_pos_bias=True,
use_shared_rel_pos_bias=False,
init_cfg=dict(type=PretrainedInit, checkpoint='', prefix='backbone.')),
neck=None,
head=dict(
type=LinearClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
init_cfg=[dict(type=TruncNormalInit, layer='Linear', std=0.02)]),
train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)]))
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=RandomResizedCrop,
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(
type=RandAugment,
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(pad_val=[104, 116, 124], interpolation='bicubic')),
dict(
type=RandomErasing,
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=0.3333333333333333,
fill_color=[103.53, 116.28, 123.675],
fill_std=[57.375, 57.12, 58.395]),
dict(type=PackInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=ResizeEdge,
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type=CenterCrop, crop_size=224),
dict(type=PackInputs)
]
train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(batch_size=128, dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# optimizer wrapper
optim_wrapper = dict(
optimizer=dict(type=AdamW, lr=4e-3, weight_decay=0.05, betas=(0.9, 0.999)),
constructor=LearningRateDecayOptimWrapperConstructor,
paramwise_cfg=dict(
_delete_=True,
layer_decay_rate=0.65,
custom_keys={
# the following configurations are designed for BEiT
'.ln': dict(decay_mult=0.0),
'.bias': dict(decay_mult=0.0),
'q_bias': dict(decay_mult=0.0),
'v_bias': dict(decay_mult=0.0),
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0),
'.gamma': dict(decay_mult=0.0),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=1e-4,
by_epoch=True,
begin=0,
end=20,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
by_epoch=True,
begin=20,
end=100,
eta_min=1e-6,
convert_to_iter_based=True)
]
# runtime settings
default_hooks = dict(
# save checkpoint per epoch.
checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=2))
train_cfg = dict(by_epoch=True, max_epochs=100)
randomness = dict(seed=0)

View File

@ -0,0 +1,50 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from ..._base_.datasets.imagenet_bs64_swin_224 import *
from ..._base_.schedules.imagenet_bs1024_adamw_swin import *
from ..._base_.default_runtime import *
from mmengine.model import ConstantInit, TruncNormalInit
from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss,
LinearClsHead)
from mmpretrain.models.utils.batch_augments import CutMix, Mixup
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
model = dict(
type=ImageClassifier,
backbone=dict(
type=BEiTViT,
arch='base',
img_size=224,
patch_size=16,
out_type='avg_featmap',
use_abs_pos_emb=False,
use_rel_pos_bias=True,
use_shared_rel_pos_bias=False,
),
neck=None,
head=dict(
type=LinearClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type=TruncNormalInit, layer='Linear', std=.02),
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
],
train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)]))

View File

@ -0,0 +1,130 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.datasets.imagenet_bs256_beitv2 import *
from .._base_.default_runtime import *
from mmengine.model import ConstantInit, PretrainedInit, TruncNormalInit
from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR
from mmengine.runner import EpochBasedTrainLoop
from torch.optim import AdamW
from mmpretrain.models import (VQKD, BEiT, BEiTPretrainViT, BEiTV2Head,
BEiTV2Neck, CrossEntropyLoss)
vqkd_encoder = dict(
arch='base',
img_size=224,
patch_size=16,
in_channels=3,
out_indices=-1,
drop_rate=0.,
drop_path_rate=0.,
norm_cfg=dict(type='LN', eps=1e-6),
final_norm=True,
out_type='featmap',
with_cls_token=True,
frozen_stages=-1,
use_abs_pos_emb=True,
use_rel_pos_bias=False,
use_shared_rel_pos_bias=False,
layer_scale_init_value=0.,
interpolate_mode='bicubic',
patch_cfg=dict(),
layer_cfgs=dict(),
init_cfg=None)
layer_scale_init_value = 0.1
drop_path_rate = 0.1 # 0. for 300 epochs and 0.1 for 1600 epochs.
model = dict(
type=BEiT,
backbone=dict(
type=BEiTPretrainViT,
arch='base',
patch_size=16,
out_indices=[-4, -1],
drop_path_rate=drop_path_rate,
final_norm=False,
out_type='raw',
layer_scale_init_value=layer_scale_init_value,
init_cfg=[
dict(type=TruncNormalInit, std=0.02, layer='Linear'),
dict(type=TruncNormalInit, std=0.02, layer='Conv2d'),
dict(type=ConstantInit, layer='LayerNorm', val=1.0, bias=0.0)
]),
neck=dict(
type=BEiTV2Neck,
num_layers=2,
early_layers=9,
backbone_arch='base',
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
),
head=dict(
type=BEiTV2Head,
embed_dims=768,
num_embed=8192,
loss=dict(type=CrossEntropyLoss)),
target_generator=dict(
type=VQKD,
encoder_config=vqkd_encoder,
init_cfg=dict(
type=PretrainedInit,
checkpoint= # noqa
'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/vqkd_encoder.pth' # noqa
)))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
# betas: (0.9, 0.98) for 300 epochs and (0.9, 0.999) for 1600 epochs.
optimizer=dict(
type=AdamW, lr=1.5e-3, betas=(0.9, 0.999), weight_decay=0.05),
clip_grad=dict(max_norm=3.0),
paramwise_cfg=dict(
custom_keys={
# the following configurations are designed for BEiT
'.ln': dict(decay_mult=0.0),
'.bias': dict(decay_mult=0.0),
'q_bias': dict(decay_mult=0.0),
'v_bias': dict(decay_mult=0.0),
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0),
'.gamma': dict(decay_mult=0.0),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
default_hooks = dict(
# only keeps the latest 3 checkpoints
checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=3))
randomness = dict(seed=0, diff_rank_seed=True)
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -0,0 +1,130 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.datasets.imagenet_bs256_beitv2 import *
from .._base_.default_runtime import *
from mmengine.model import ConstantInit, PretrainedInit, TruncNormalInit
from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR
from mmengine.runner import EpochBasedTrainLoop
from torch.optim import AdamW
from mmpretrain.models import (VQKD, BEiT, BEiTPretrainViT, BEiTV2Head,
BEiTV2Neck, CrossEntropyLoss)
# model settings
vqkd_encoder = dict(
arch='base',
img_size=224,
patch_size=16,
in_channels=3,
out_indices=-1,
drop_rate=0.,
drop_path_rate=0.,
norm_cfg=dict(type='LN', eps=1e-6),
final_norm=True,
out_type='featmap',
with_cls_token=True,
frozen_stages=-1,
use_abs_pos_emb=True,
use_rel_pos_bias=False,
use_shared_rel_pos_bias=False,
layer_scale_init_value=0.,
interpolate_mode='bicubic',
patch_cfg=dict(),
layer_cfgs=dict(),
init_cfg=None)
layer_scale_init_value = 0.1
drop_path_rate = 0. # 0. for 300 epochs and 0.1 for 1600 epochs.
model = dict(
type=BEiT,
backbone=dict(
type=BEiTPretrainViT,
arch='base',
patch_size=16,
out_indices=[-4, -1],
drop_path_rate=drop_path_rate,
final_norm=False,
out_type='raw',
layer_scale_init_value=layer_scale_init_value,
init_cfg=[
dict(type=TruncNormalInit, std=0.02, layer='Linear'),
dict(type=TruncNormalInit, std=0.02, layer='Conv2d'),
dict(type=ConstantInit, layer='LayerNorm', val=1.0, bias=0.0)
]),
neck=dict(
type=BEiTV2Neck,
num_layers=2,
early_layers=9,
backbone_arch='base',
drop_path_rate=drop_path_rate,
layer_scale_init_value=layer_scale_init_value,
),
head=dict(
type=BEiTV2Head,
embed_dims=768,
num_embed=8192,
loss=dict(type=CrossEntropyLoss)),
target_generator=dict(
type=VQKD,
encoder_config=vqkd_encoder,
init_cfg=dict(
type=PretrainedInit,
checkpoint= # noqa
'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/vqkd_encoder.pth' # noqa
)))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
# betas: (0.9, 0.98) for 300 epochs and (0.9, 0.999) for 1600 epochs.
optimizer=dict(
type=AdamW, lr=1.5e-3, betas=(0.9, 0.98), weight_decay=0.05),
clip_grad=dict(max_norm=3.0),
paramwise_cfg=dict(
custom_keys={
# the following configurations are designed for BEiT
'.ln': dict(decay_mult=0.0),
'.bias': dict(decay_mult=0.0),
'q_bias': dict(decay_mult=0.0),
'v_bias': dict(decay_mult=0.0),
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0),
'.gamma': dict(decay_mult=0.0),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=300,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300)
default_hooks = dict(
# only keeps the latest 3 checkpoints
checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=3))
randomness = dict(seed=0, diff_rank_seed=True)
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -0,0 +1,132 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from ..._base_.datasets.imagenet_bs64_swin_224 import *
from ..._base_.schedules.imagenet_bs1024_adamw_swin import *
from ..._base_.default_runtime import *
from mmengine.model import PretrainedInit, TruncNormalInit
from mmengine.optim import CosineAnnealingLR, LinearLR
from torch.optim import AdamW
from mmpretrain.engine.optimizers import \
LearningRateDecayOptimWrapperConstructor
from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss,
LinearClsHead)
from mmpretrain.models.utils.batch_augments import CutMix, Mixup
# model settings
model = dict(
type=ImageClassifier,
backbone=dict(
type=BEiTViT,
arch='base',
img_size=224,
patch_size=16,
# 0.2 for 1600 epochs pretrained models and 0.1 for 300 epochs.
drop_path_rate=0.1,
out_type='avg_featmap',
use_abs_pos_emb=False,
use_rel_pos_bias=True,
use_shared_rel_pos_bias=False,
init_cfg=dict(type=PretrainedInit, checkpoint='', prefix='backbone.')),
neck=None,
head=dict(
type=LinearClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
init_cfg=[dict(type=TruncNormalInit, layer='Linear', std=0.02)]),
train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)]))
train_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=RandomResizedCrop,
scale=224,
backend='pillow',
interpolation='bicubic'),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(
type=RandAugment,
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(pad_val=[104, 116, 124], interpolation='bicubic')),
dict(
type=RandomErasing,
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=0.3333333333333333,
fill_color=[103.53, 116.28, 123.675],
fill_std=[57.375, 57.12, 58.395]),
dict(type=PackInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(
type=ResizeEdge,
scale=256,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type=CenterCrop, crop_size=224),
dict(type=PackInputs)
]
train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(batch_size=128, dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# optimizer wrapper
optim_wrapper = dict(
optimizer=dict(type=AdamW, lr=5e-4, weight_decay=0.05, betas=(0.9, 0.999)),
constructor=LearningRateDecayOptimWrapperConstructor,
paramwise_cfg=dict(
_delete_=True,
# 0.6 for 1600 epochs pretrained models and 0.65 for 300 epochs
layer_decay_rate=0.65,
custom_keys={
# the following configurations are designed for BEiT
'.ln': dict(decay_mult=0.0),
'.bias': dict(decay_mult=0.0),
'q_bias': dict(decay_mult=0.0),
'v_bias': dict(decay_mult=0.0),
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0),
'.gamma': dict(decay_mult=0.0),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=1e-4,
by_epoch=True,
begin=0,
end=20,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
by_epoch=True,
begin=20,
end=100,
eta_min=1e-6,
convert_to_iter_based=True)
]
# runtime settings
default_hooks = dict(
# save checkpoint per epoch.
checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=2))
train_cfg = dict(by_epoch=True, max_epochs=100)
randomness = dict(seed=0)

View File

@ -0,0 +1,42 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from ..._base_.datasets.imagenet_bs64_swin_224 import *
from ..._base_.schedules.imagenet_bs1024_adamw_swin import *
from ..._base_.default_runtime import *
from mmengine.model import ConstantInit, TruncNormalInit
from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss,
LinearClsHead)
from mmpretrain.models.utils.batch_augments.cutmix import CutMix
from mmpretrain.models.utils.batch_augments.mixup import Mixup
model = dict(
type=ImageClassifier,
backbone=dict(
type=BEiTViT,
arch='base',
img_size=224,
patch_size=16,
out_type='avg_featmap',
use_abs_pos_emb=False,
use_rel_pos_bias=True,
use_shared_rel_pos_bias=False,
),
neck=None,
head=dict(
type=LinearClsHead,
num_classes=1000,
in_channels=768,
loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type=TruncNormalInit, layer='Linear', std=.02),
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.),
],
train_cfg=dict(
augments=[dict(type=Mixup, alpha=0.8),
dict(type=CutMix, alpha=1.0)]))

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=1560,
by_epoch=True,
begin=40,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=360,
by_epoch=True,
begin=40,
end=400,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=760,
by_epoch=True,
begin=40,
end=800,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model.update(
backbone=dict(type=MAEHiViT, arch='large'),
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=1560,
by_epoch=True,
begin=40,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model.update(
backbone=dict(type=MAEHiViT, arch='large'),
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=360,
by_epoch=True,
begin=40,
end=400,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_hivit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model.update(
backbone=dict(type=MAEHiViT, arch='large'),
neck=dict(type=MAEPretrainDecoder, embed_dim=768))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'norm': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=760,
by_epoch=True,
begin=40,
end=800,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
find_unused_parameters = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=1560,
by_epoch=True,
begin=40,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=360,
by_epoch=True,
begin=40,
end=400,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=760,
by_epoch=True,
begin=40,
end=800,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,75 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model.update(
backbone=dict(type=MAEViT, arch='h', patch_size=14),
neck=dict(
type=MAEPretrainDecoder,
embed_dim=1280,
patch_size=14,
num_patches=256),
head=dict(patch_size=14))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=1560,
by_epoch=True,
begin=40,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model = dict(
backbone=dict(type=MAEViT, arch='l'),
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=1560,
by_epoch=True,
begin=40,
end=1600,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model = dict(
backbone=dict(type=MAEViT, arch='l'),
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=260,
by_epoch=True,
begin=40,
end=300,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model = dict(
backbone=dict(type=MAEViT, arch='l'),
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=360,
by_epoch=True,
begin=40,
end=400,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,70 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.config import read_base
with read_base():
from .._base_.models.mae_vit_base_p16 import *
from .._base_.datasets.imagenet_bs512_mae import *
from .._base_.default_runtime import *
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR
from mmengine.runner.loops import EpochBasedTrainLoop
from torch.optim.adamw import AdamW
# model settings
model = dict(
backbone=dict(type=MAEViT, arch='l'),
neck=dict(type=MAEPretrainDecoder, embed_dim=1024))
# optimizer wrapper
optim_wrapper = dict(
type=AmpOptimWrapper,
loss_scale='dynamic',
optimizer=dict(
type=AdamW,
lr=1.5e-4 * 4096 / 256,
betas=(0.9, 0.95),
weight_decay=0.05),
paramwise_cfg=dict(
custom_keys={
'ln': dict(decay_mult=0.0),
'bias': dict(decay_mult=0.0),
'pos_embed': dict(decay_mult=0.),
'mask_token': dict(decay_mult=0.),
'cls_token': dict(decay_mult=0.)
}))
# learning rate scheduler
param_scheduler = [
dict(
type=LinearLR,
start_factor=0.0001,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
T_max=760,
by_epoch=True,
begin=40,
end=800,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=800)
# only keeps the latest 3 checkpoints
default_hooks.checkpoint = dict(
type=CheckpointHook, interval=1, max_keep_ckpts=3)
randomness.update(seed=0, diff_rank_seed=True)
# auto resume
resume = True
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -14,6 +14,37 @@ from mmpretrain.registry import DATASETS
class RefCOCO(BaseDataset):
"""RefCOCO dataset.
RefCOCO is a popular dataset used for the task of visual grounding.
Here are the steps for accessing and utilizing the
RefCOCO dataset.
You can access the RefCOCO dataset from the official source:
https://github.com/lichengunc/refer
The RefCOCO dataset is organized in a structured format: ::
FeaturesDict({
'coco_annotations': Sequence({
'area': int64,
'bbox': BBoxFeature(shape=(4,), dtype=float32),
'id': int64,
'label': int64,
}),
'image': Image(shape=(None, None, 3), dtype=uint8),
'image/id': int64,
'objects': Sequence({
'area': int64,
'bbox': BBoxFeature(shape=(4,), dtype=float32),
'gt_box_index': int64,
'id': int64,
'label': int64,
'refexp': Sequence({
'raw': Text(shape=(), dtype=string),
'refexp_id': int64,
}),
}),
})
Args:
ann_file (str): Annotation file path.
data_root (str): The root directory for ``data_prefix`` and