From c84a58b7b519b587ffb1cdd7427dab2084d3b0ac Mon Sep 17 00:00:00 2001 From: "limengzhang.vendor" Date: Wed, 8 Jun 2022 06:28:35 +0000 Subject: [PATCH] [Refactor] Refacor default_hooks and train & val & test loops in configs --- configs/_base_/default_runtime.py | 19 +++--- configs/_base_/schedules/schedule_160k.py | 18 ++++-- configs/_base_/schedules/schedule_20k.py | 17 +++-- configs/_base_/schedules/schedule_320k.py | 18 ++++-- configs/_base_/schedules/schedule_40k.py | 17 +++-- configs/_base_/schedules/schedule_80k.py | 17 +++-- ...ernet_beit-base_8x2_640x640_160k_ade20k.py | 6 +- ...beit-large_fp16_8x1_640x640_160k_ade20k.py | 7 ++- ..._lr5e-3_4x4_512x512_160k_coco-stuff164k.py | 6 +- ...1_r18-d32_4x4_1024x1024_160k_cityscapes.py | 3 +- ..._in1k-pre_4x4_1024x1024_160k_cityscapes.py | 3 +- ..._lr5e-3_4x4_512x512_160k_coco-stuff164k.py | 3 +- ...1_r50-d32_4x4_1024x1024_160k_cityscapes.py | 3 +- ..._lr5e-3_4x4_512x512_160k_coco-stuff164k.py | 3 +- ...netv2_fcn_4x4_1024x1024_160k_cityscapes.py | 3 +- ...netv2_fcn_4x8_1024x1024_160k_cityscapes.py | 3 +- ..._fcn_fp16_4x4_1024x1024_160k_cityscapes.py | 2 +- ..._fcn_ohem_4x4_1024x1024_160k_cityscapes.py | 63 ++++++++++++++++++- .../cgnet/cgnet_512x1024_60k_cityscapes.py | 8 ++- configs/cgnet/cgnet_680x680_60k_cityscapes.py | 8 ++- ..._convnext_base_fp16_512x512_160k_ade20k.py | 13 ++-- ..._convnext_base_fp16_640x640_160k_ade20k.py | 13 ++-- ...convnext_large_fp16_640x640_160k_ade20k.py | 13 ++-- ...convnext_small_fp16_512x512_160k_ade20k.py | 13 ++-- ..._convnext_tiny_fp16_512x512_160k_ade20k.py | 13 ++-- ...onvnext_xlarge_fp16_640x640_160k_ade20k.py | 13 ++-- ...v3_r101-d8_fp16_512x1024_80k_cityscapes.py | 2 +- ...us_r101-d8_fp16_512x1024_80k_cityscapes.py | 2 +- .../dpt/dpt_vit-b16_512x512_160k_ade20k.py | 6 +- ...cn_r101-d8_fp16_512x1024_80k_cityscapes.py | 2 +- ...bv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 7 ++- ...fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 7 ++- ...net_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 7 ++- ...net_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 7 ++- ...net_swin-t_8x2_512x512_adamw_80k_ade20k.py | 13 +++- ...t_mae-base_fp16_8x2_512x512_160k_ade20k.py | 10 ++- ...net_r101-d8_512x1024_40k_b16_cityscapes.py | 3 +- ...net_r101-d8_512x1024_80k_b16_cityscapes.py | 3 +- ...et_r101-d8_fp16_512x1024_80k_cityscapes.py | 2 +- ...-pretrain_512x1024_adamw_80k_cityscapes.py | 7 ++- ...-pretrain_512x1024_adamw_80k_cityscapes.py | 7 ++- .../segformer_mit-b0_512x512_160k_ade20k.py | 6 +- ...er_mit-b0_8x1_1024x1024_160k_cityscapes.py | 7 ++- ...512x512_160k_ade20k_pretrain_224x224_1K.py | 6 +- ...cpvt-s_uperhead_8x4_512x512_160k_ade20k.py | 6 +- ..._svt-s_uperhead_8x2_512x512_160k_ade20k.py | 6 +- ...rnet_vit-b16_ln_mln_512x512_160k_ade20k.py | 6 +- ...upernet_vit-b16_mln_512x512_160k_ade20k.py | 6 +- .../upernet_vit-b16_mln_512x512_80k_ade20k.py | 6 +- 49 files changed, 338 insertions(+), 101 deletions(-) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py index b564cc4e7..5925c6926 100644 --- a/configs/_base_/default_runtime.py +++ b/configs/_base_/default_runtime.py @@ -1,14 +1,9 @@ -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -dist_params = dict(backend='nccl') +default_scope = 'mmseg' +env_cfg = dict( + cudnn_benchmark=True, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) log_level = 'INFO' load_from = None -resume_from = None -workflow = [('train', 1)] -cudnn_benchmark = True +resume = False diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py index 39630f215..0dcc52a70 100644 --- a/configs/_base_/schedules/schedule_160k.py +++ b/configs/_base_/schedules/schedule_160k.py @@ -1,9 +1,19 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=160000) -checkpoint_config = dict(by_epoch=False, interval=16000) +# training schedule for 160k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') evaluation = dict(interval=16000, metric='mIoU', pre_eval=True) +default_hooks = dict( + optimizer=dict(type='OptimizerHook', grad_clip=None), + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), + sampler_seed=dict(type='DistSamplerSeedHook'), +) diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py index 73c702197..26697c713 100644 --- a/configs/_base_/schedules/schedule_20k.py +++ b/configs/_base_/schedules/schedule_20k.py @@ -1,9 +1,18 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=20000) -checkpoint_config = dict(by_epoch=False, interval=2000) +# training schedule for 20k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) +default_hooks = dict( + optimizer=dict(type='OptimizerHook', grad_clip=None), + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + sampler_seed=dict(type='DistSamplerSeedHook'), +) diff --git a/configs/_base_/schedules/schedule_320k.py b/configs/_base_/schedules/schedule_320k.py index a0b230626..961fa6dc5 100644 --- a/configs/_base_/schedules/schedule_320k.py +++ b/configs/_base_/schedules/schedule_320k.py @@ -1,9 +1,19 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=320000) -checkpoint_config = dict(by_epoch=False, interval=32000) +# training schedule for 320k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') evaluation = dict(interval=32000, metric='mIoU') +default_hooks = dict( + optimizer=dict(type='OptimizerHook', grad_clip=None), + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), + sampler_seed=dict(type='DistSamplerSeedHook'), +) diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py index d2c502325..5ce4ff170 100644 --- a/configs/_base_/schedules/schedule_40k.py +++ b/configs/_base_/schedules/schedule_40k.py @@ -1,9 +1,18 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=40000) -checkpoint_config = dict(by_epoch=False, interval=4000) +# training schedule for 40k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') evaluation = dict(interval=4000, metric='mIoU', pre_eval=True) +default_hooks = dict( + optimizer=dict(type='OptimizerHook', grad_clip=None), + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), + sampler_seed=dict(type='DistSamplerSeedHook'), +) diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py index 8365a878e..bb92915ce 100644 --- a/configs/_base_/schedules/schedule_80k.py +++ b/configs/_base_/schedules/schedule_80k.py @@ -1,9 +1,18 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) -# runtime settings -runner = dict(type='IterBasedRunner', max_iters=80000) -checkpoint_config = dict(by_epoch=False, interval=8000) +# training schedule for 80k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') evaluation = dict(interval=8000, metric='mIoU', pre_eval=True) +default_hooks = dict( + optimizer=dict(type='OptimizerHook', grad_clip=None), + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), + sampler_seed=dict(type='DistSamplerSeedHook'), +) diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index 08ca8ab78..0fb2c280c 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -12,7 +12,11 @@ optimizer = dict( type='AdamW', lr=3e-5, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, constructor='LayerDecayOptimizerConstructor', paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index d41a74614..827a32850 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -26,10 +26,13 @@ optimizer = dict( type='AdamW', lr=2e-5, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, constructor='LayerDecayOptimizerConstructor', paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) - lr_config = dict( _delete_=True, policy='poly', diff --git a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index b1e1c3e86..f83e6c9d9 100644 --- a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -15,4 +15,8 @@ model = dict( dict(in_channels=512, channels=256, num_classes=171), ]) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.005) +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py index 198be7bd1..16b202e50 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py @@ -4,7 +4,8 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.025) +optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py index 0e694afa9..53d564510 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -9,7 +9,8 @@ model = dict( init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.025) +optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index 78d7fea62..ef07f8a77 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -10,4 +10,5 @@ model = dict( dict(num_classes=171), ]) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.005) +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py index 4a979f26a..b90edb699 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py @@ -35,7 +35,8 @@ model = dict( concat_input=False), ]) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.05) +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index dbbccc69d..da4852b10 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -15,4 +15,5 @@ model = dict( dict(in_channels=512, channels=256, num_classes=171), ]) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.005) +optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) diff --git a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py index 0a502a99b..5ef4e9438 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py @@ -4,7 +4,8 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.05) +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py index 287146bc8..06bc41d58 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py @@ -4,7 +4,8 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.05) +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=8, num_workers=4) val_dataloader = dict(batch_size=8, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py index 0196214b7..5ecc00bc6 100644 --- a/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = './bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py' # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) # fp16 placeholder fp16 = dict() diff --git a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py index 75fc49897..d945b0ccd 100644 --- a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py @@ -3,9 +3,68 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -sampler = dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000) +norm_cfg = dict(type='SyncBN', requires_grad=True) +models = dict( + decode_head=dict( + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=16, + channels=16, + num_convs=2, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=32, + channels=64, + num_convs=2, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=64, + channels=256, + num_convs=2, + num_classes=19, + in_index=3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=1024, + num_convs=2, + num_classes=19, + in_index=4, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], +) lr_config = dict(warmup='linear', warmup_iters=1000) -optimizer = dict(lr=0.05) +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) val_dataloader = dict(batch_size=4, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py index 83088b5dc..45c43d4fb 100644 --- a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py +++ b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -2,12 +2,16 @@ _base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py'] # optimizer optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) # runtime settings total_iters = 60000 -checkpoint_config = dict(by_epoch=False, interval=4000) +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=total_iters, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) evaluation = dict(interval=4000, metric='mIoU') # dataset settings diff --git a/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/configs/cgnet/cgnet_680x680_60k_cityscapes.py index ffc96bdfa..03229dd3e 100644 --- a/configs/cgnet/cgnet_680x680_60k_cityscapes.py +++ b/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -5,12 +5,16 @@ _base_ = [ # optimizer optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) -optimizer_config = dict() +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) # runtime settings total_iters = 60000 -checkpoint_config = dict(by_epoch=False, interval=4000) +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=total_iters, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) evaluation = dict(interval=4000, metric='mIoU') img_norm_cfg = dict( diff --git a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py index 26e669648..3c0ba2a52 100644 --- a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -10,17 +10,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -37,6 +41,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py index 760cde32d..3dc5c8ac5 100644 --- a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -25,17 +25,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -52,6 +56,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py index 6bef56185..f903ace85 100644 --- a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -25,17 +25,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -52,6 +56,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py index f124888c3..b17f8694c 100644 --- a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -24,17 +24,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -51,6 +55,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py index 75e24842e..981d974d0 100644 --- a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -24,17 +24,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 6 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -51,6 +55,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py index 31899793c..e6de67527 100644 --- a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -25,17 +25,21 @@ model = dict( ) optimizer = dict( - constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', lr=0.00008, betas=(0.9, 0.999), - weight_decay=0.05, + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 - }) + }, + constructor='LearningRateDecayOptimizerConstructor') lr_config = dict( _delete_=True, @@ -52,6 +56,7 @@ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=2) test_dataloader = val_dataloader # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +default_hooks = dict( + optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) # fp16 placeholder fp16 = dict() diff --git a/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py index e32610966..f5f3a1878 100644 --- a/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = './deeplabv3_r101-d8_512x1024_80k_cityscapes.py' # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) # fp16 placeholder fp16 = dict() diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py index fc369405d..a04393867 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = './deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) # fp16 placeholder fp16 = dict() diff --git a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py index 907bfcbbb..d1f3d3741 100644 --- a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py +++ b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py @@ -10,7 +10,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_embed': dict(decay_mult=0.), diff --git a/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py index c6739d952..fdfcb6d13 100644 --- a/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = './fcn_r101-d8_512x1024_80k_cityscapes.py' # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) # fp16 placeholder fp16 = dict() diff --git a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py index d906b991b..60641dd3b 100644 --- a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -79,7 +79,12 @@ model = dict( # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 1f83f095c..8537f0828 100644 --- a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -79,7 +79,12 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index a4e7bca10..b80e4c9a2 100644 --- a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -78,7 +78,12 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 90a06aa1a..45501e7fb 100644 --- a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -79,7 +79,12 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py index 0d3294c89..f31882bb3 100644 --- a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py @@ -36,14 +36,23 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.0005, + weight_decay=0.0005) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'absolute_pos_embed': dict(decay_mult=0.), 'relative_position_bias_table': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) })) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) + +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py index 0bfe224c5..d9592d13e 100644 --- a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py +++ b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py @@ -27,9 +27,13 @@ optimizer = dict( type='AdamW', lr=1e-4, betas=(0.9, 0.999), - weight_decay=0.05, - constructor='LayerDecayOptimizerConstructor', - paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65)) + weight_decay=0.05) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65), + constructor='LayerDecayOptimizerConstructor') lr_config = dict( _delete_=True, diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py index 3dd70b74a..f0bd36240 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py @@ -3,5 +3,6 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) -optimizer = dict(lr=0.02) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) lr_config = dict(min_lr=2e-4) diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py index 33d96c76f..d7932189f 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py @@ -3,5 +3,6 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) -optimizer = dict(lr=0.02) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) lr_config = dict(min_lr=2e-4) diff --git a/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py index c71b7f638..604eb1b73 100644 --- a/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,5 @@ _base_ = './pspnet_r101-d8_512x1024_80k_cityscapes.py' # fp16 settings -optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) # fp16 placeholder fp16 = dict() diff --git a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index 028387621..c96975784 100644 --- a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -13,7 +13,12 @@ model = dict( strides=(1, 2, 2, 2))) optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index a8a80bff1..3bd9c16dc 100644 --- a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -11,7 +11,12 @@ model = dict( type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) -optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +default_hooks = dict( + optimizer=dict( + _delete_=True, + type='OptimizerHook', + grad_clip=dict(max_norm=1, norm_type=2))) # learning policy lr_config = dict( _delete_=True, diff --git a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py index d0551f115..ffe4a9830 100644 --- a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -12,7 +12,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_block': dict(decay_mult=0.), diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py index 3b87b2e5a..3a007f84b 100644 --- a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -15,14 +15,17 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_block': dict(decay_mult=0.), 'norm': dict(decay_mult=0.), 'head': dict(lr_mult=10.) })) - lr_config = dict( _delete_=True, policy='poly', diff --git a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py index d966e113d..2bdd8eac5 100644 --- a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py +++ b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -23,7 +23,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'absolute_pos_embed': dict(decay_mult=0.), diff --git a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py index c888b921c..29edb8b9d 100644 --- a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py @@ -9,7 +9,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict(custom_keys={ 'pos_block': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) diff --git a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py index 8d2848262..96ff5e865 100644 --- a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -24,7 +24,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict(custom_keys={ 'pos_block': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) diff --git a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py index 3acb9abb9..03874f513 100644 --- a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -17,7 +17,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_embed': dict(decay_mult=0.), diff --git a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py index 821eb0619..416927e44 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py @@ -16,7 +16,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_embed': dict(decay_mult=0.), diff --git a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py index d83d8ac75..e016cdaf3 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py @@ -16,7 +16,11 @@ optimizer = dict( type='AdamW', lr=0.00006, betas=(0.9, 0.999), - weight_decay=0.01, + weight_decay=0.01) + +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, paramwise_cfg=dict( custom_keys={ 'pos_embed': dict(decay_mult=0.),