diff --git a/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py b/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py index 081a041d..465ff5c3 100644 --- a/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py +++ b/configs/maskfeat/maskfeat_vit-base-p16_8xb256-amp-coslr-300e_in1k.py @@ -43,15 +43,12 @@ train_dataloader = dict( # model settings model = dict( type='MaskFeat', - data_preprocessor=dict( - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True), backbone=dict(type='MaskFeatViT', arch='b', patch_size=16), neck=dict( type='LinearNeck', in_channels=768, out_channels=108, + norm_cfg=None, init_cfg=dict(type='TruncNormal', layer='Linear', std=0.02, bias=0)), head=dict( type='MIMHead', @@ -67,13 +64,13 @@ optim_wrapper = dict( type='AdamW', lr=2e-4 * 8, betas=(0.9, 0.999), weight_decay=0.05), clip_grad=dict(max_norm=0.02), paramwise_cfg=dict( - norm_decay_mult=0.0, bias_decay_mult=0.0, - # commented 'pos_embed' and 'cls_token' to avoid loss stuck situation + norm_decay_mult=0.0, + flat_decay_mult=0.0, custom_keys={ # 'pos_embed': dict(decay_mult=0.), - 'mask_token': dict(decay_mult=0.), - # 'cls_token': dict(decay_mult=0.) + # 'cls_token': dict(decay_mult=0.), + 'mask_token': dict(decay_mult=0.) })) # learning rate scheduler @@ -88,6 +85,7 @@ param_scheduler = [ dict( type='CosineAnnealingLR', T_max=270, + eta_min=1e-6, by_epoch=True, begin=30, end=300,