diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py index 0dcc52a70..1ac73e3c2 100644 --- a/configs/_base_/schedules/schedule_160k.py +++ b/configs/_base_/schedules/schedule_160k.py @@ -2,13 +2,20 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] # training schedule for 160k train_cfg = dict( type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -evaluation = dict(interval=16000, metric='mIoU', pre_eval=True) default_hooks = dict( optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py index 26697c713..97535a00c 100644 --- a/configs/_base_/schedules/schedule_20k.py +++ b/configs/_base_/schedules/schedule_20k.py @@ -2,12 +2,19 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=20000, + by_epoch=False) +] # training schedule for 20k train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) default_hooks = dict( optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), diff --git a/configs/_base_/schedules/schedule_320k.py b/configs/_base_/schedules/schedule_320k.py index 961fa6dc5..fff100ed2 100644 --- a/configs/_base_/schedules/schedule_320k.py +++ b/configs/_base_/schedules/schedule_320k.py @@ -2,13 +2,20 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=320000, + by_epoch=False) +] # training schedule for 320k train_cfg = dict( type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -evaluation = dict(interval=32000, metric='mIoU') default_hooks = dict( optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py index 5ce4ff170..bfad0c345 100644 --- a/configs/_base_/schedules/schedule_40k.py +++ b/configs/_base_/schedules/schedule_40k.py @@ -2,12 +2,19 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] # training schedule for 40k train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -evaluation = dict(interval=4000, metric='mIoU', pre_eval=True) default_hooks = dict( optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py index bb92915ce..3d8401b20 100644 --- a/configs/_base_/schedules/schedule_80k.py +++ b/configs/_base_/schedules/schedule_80k.py @@ -2,12 +2,19 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=80000, + by_epoch=False) +] # training schedule for 80k train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') -evaluation = dict(interval=8000, metric='mIoU', pre_eval=True) default_hooks = dict( optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), diff --git a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py index 0fb2c280c..1ff26c1ae 100644 --- a/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -20,15 +20,18 @@ optim_wrapper = dict( constructor='LayerDecayOptimizerConstructor', paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index 827a32850..d3c0a15b6 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -33,15 +33,19 @@ optim_wrapper = dict( optimizer=optimizer, constructor='LayerDecayOptimizerConstructor', paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=3000, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=3000), + dict( + type='PolyLR', + power=1.0, + begin=3000, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] train_dataloader = dict(batch_size=1) val_dataloader = dict(batch_size=1) diff --git a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index f83e6c9d9..3b53dc6a8 100644 --- a/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -14,7 +14,17 @@ model = dict( dict(in_channels=512, channels=256, num_classes=171), dict(in_channels=512, channels=256, num_classes=171), ]) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py index 16b202e50..4dbd2eb87 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py @@ -3,7 +3,17 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py index 53d564510..ba92ba162 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -8,7 +8,17 @@ model = dict( backbone_cfg=dict( init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index ef07f8a77..44c75225c 100644 --- a/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -9,6 +9,19 @@ model = dict( dict(num_classes=171), dict(num_classes=171), ]) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py index b90edb699..26e05303a 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py @@ -34,7 +34,17 @@ model = dict( norm_cfg=norm_cfg, concat_input=False), ]) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py index da4852b10..93fbbbead 100644 --- a/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py +++ b/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -14,6 +14,19 @@ model = dict( dict(in_channels=512, channels=256, num_classes=171), dict(in_channels=512, channels=256, num_classes=171), ]) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +train_dataloader = dict(batch_size=4, num_workers=4) +val_dataloader = dict(batch_size=4, num_workers=4) +test_dataloader = val_dataloader diff --git a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py index 5ef4e9438..b2988f744 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py @@ -3,7 +3,17 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py index 06bc41d58..92d9914a8 100644 --- a/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py @@ -3,7 +3,17 @@ _base_ = [ '../_base_/datasets/cityscapes_1024x1024.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=8, num_workers=4) diff --git a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py index d945b0ccd..123e030a1 100644 --- a/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py @@ -62,7 +62,17 @@ models = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), ], ) -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py index 45c43d4fb..b838b813f 100644 --- a/configs/cgnet/cgnet_512x1024_60k_cityscapes.py +++ b/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -4,7 +4,15 @@ _base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py'] optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + by_epoch=False, + begin=0, + end=60000) +] # runtime settings total_iters = 60000 train_cfg = dict( @@ -12,7 +20,6 @@ train_cfg = dict( val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) -evaluation = dict(interval=4000, metric='mIoU') # dataset settings dataset_type = 'CityscapesDataset' diff --git a/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/configs/cgnet/cgnet_680x680_60k_cityscapes.py index 03229dd3e..24e873e52 100644 --- a/configs/cgnet/cgnet_680x680_60k_cityscapes.py +++ b/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -7,7 +7,15 @@ _base_ = [ optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) # learning policy -lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + by_epoch=False, + begin=0, + end=60000) +] # runtime settings total_iters = 60000 train_cfg = dict( @@ -15,7 +23,6 @@ train_cfg = dict( val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict(checkpoint=dict(by_epoch=False, interval=4000)) -evaluation = dict(interval=4000, metric='mIoU') img_norm_cfg = dict( mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) diff --git a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py index 3c0ba2a52..6a0963e14 100644 --- a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -26,15 +26,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py index 3dc5c8ac5..f0abb7231 100644 --- a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -41,15 +41,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py index f903ace85..656cb1281 100644 --- a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -41,15 +41,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py index b17f8694c..b06e819a6 100644 --- a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -40,15 +40,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py index 981d974d0..797026779 100644 --- a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -40,15 +40,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py index e6de67527..9e125fde9 100644 --- a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -41,15 +41,18 @@ optim_wrapper = dict( }, constructor='LearningRateDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py index d1f3d3741..d8a0e4620 100644 --- a/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py +++ b/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py @@ -22,15 +22,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 60641dd3b..80ca2774a 100644 --- a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -86,14 +86,18 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 8537f0828..8afb51d88 100644 --- a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -86,14 +86,18 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index b80e4c9a2..cd39bd387 100644 --- a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -85,14 +85,18 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 45501e7fb..9c5f84849 100644 --- a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -86,14 +86,18 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py index f31882bb3..fc09e7ff6 100644 --- a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py @@ -54,14 +54,18 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + milestones=[60000, 72000], + by_epoch=False, + ) +] # In K-Net implementation we use batch size 2 per GPU as default train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py index d9592d13e..7ee3d69a0 100644 --- a/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py +++ b/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py @@ -35,15 +35,18 @@ optim_wrapper = dict( paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65), constructor='LayerDecayOptimizerConstructor') -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] # mixed precision fp16 = dict(loss_scale='dynamic') diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py index f0bd36240..20fd24e77 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py @@ -5,4 +5,12 @@ _base_ = [ model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -lr_config = dict(min_lr=2e-4) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=2e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] diff --git a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py index d7932189f..29775b50f 100644 --- a/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py +++ b/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py @@ -5,4 +5,12 @@ _base_ = [ model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0005) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -lr_config = dict(min_lr=2e-4) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=2e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] diff --git a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py index 96cbaa48d..ed56b952c 100644 --- a/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py +++ b/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py @@ -2,4 +2,14 @@ _base_ = [ '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -lr_config = dict(warmup='linear', warmup_iters=200) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=200, + end=80000, + by_epoch=False, + ) +] diff --git a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py index db8c634c0..51e489cb2 100644 --- a/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py +++ b/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py @@ -29,4 +29,14 @@ model = dict(decode_head=[ loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) ]) -lr_config = dict(warmup='linear', warmup_iters=200) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=200), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=200, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index c96975784..23b636bb5 100644 --- a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -20,11 +20,15 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + by_epoch=False, + milestones=[60000, 72000], + ) +] diff --git a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index 3bd9c16dc..614598872 100644 --- a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -18,11 +18,15 @@ default_hooks = dict( type='OptimizerHook', grad_clip=dict(max_norm=1, norm_type=2))) # learning policy -lr_config = dict( - _delete_=True, - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[60000, 72000], - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=1000, + end=80000, + by_epoch=False, + milestones=[60000, 72000], + ) +] diff --git a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py index ffe4a9830..697232dc5 100644 --- a/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py +++ b/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -24,15 +24,18 @@ optim_wrapper = dict( 'head': dict(lr_mult=10.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) test_dataloader = val_dataloader diff --git a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py index 3a007f84b..c124fdfde 100644 --- a/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py +++ b/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -26,15 +26,20 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.), 'head': dict(lr_mult=10.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] + train_dataloader = dict(batch_size=1, num_workers=1) val_dataloader = dict(batch_size=1, num_workers=1) test_dataloader = val_dataloader diff --git a/configs/stdc/stdc1_512x1024_80k_cityscapes.py b/configs/stdc/stdc1_512x1024_80k_cityscapes.py index 021b6a5a2..8a04cd225 100644 --- a/configs/stdc/stdc1_512x1024_80k_cityscapes.py +++ b/configs/stdc/stdc1_512x1024_80k_cityscapes.py @@ -2,7 +2,17 @@ _base_ = [ '../_base_/models/stdc.py', '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] -lr_config = dict(warmup='linear', warmup_iters=1000) +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=80000, + by_epoch=False, + ) +] train_dataloader = dict(batch_size=12, num_workers=4) val_dataloader = dict(batch_size=12, num_workers=4) test_dataloader = val_dataloader diff --git a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py index 2bdd8eac5..0f3d28618 100644 --- a/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py +++ b/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -35,15 +35,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py index 29edb8b9d..81451713b 100644 --- a/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py @@ -19,12 +19,15 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] diff --git a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py index 96ff5e865..770847ef7 100644 --- a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -34,15 +34,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] train_dataloader = dict(batch_size=2, num_workers=2) val_dataloader = dict(batch_size=2, num_workers=2) diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py index c706cf354..3f501fb1a 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py @@ -4,4 +4,3 @@ _base_ = [ '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py index 0ef02dcc4..1dfd5f123 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py index 118428bc4..8492d44a7 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py index 1f8862a0e..c55521d21 100644 --- a/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py index 2bc52d962..f4809784a 100644 --- a/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py index 5d836c61d..f62323ec6 100644 --- a/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py index be8eec777..7dc904414 100644 --- a/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py index 80483ade4..3aec7c9de 100644 --- a/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py index b085a17d6..7c9ca3479 100644 --- a/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py @@ -4,4 +4,3 @@ _base_ = [ '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py index 9d729cea6..1d2ee426a 100644 --- a/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py +++ b/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py index f57c9166b..f1b2879b3 100644 --- a/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py +++ b/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) -evaluation = dict(metric='mDice') diff --git a/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py index 7b5421ad6..765b18527 100644 --- a/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py +++ b/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py @@ -3,4 +3,3 @@ _base_ = [ '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' ] model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) -evaluation = dict(metric='mDice') diff --git a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py index 03874f513..8c81bb726 100644 --- a/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -29,15 +29,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py index 416927e44..66821e6c5 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py @@ -28,15 +28,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2) diff --git a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py index e016cdaf3..70b8d35e9 100644 --- a/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py +++ b/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py @@ -28,15 +28,18 @@ optim_wrapper = dict( 'norm': dict(decay_mult=0.) })) -lr_config = dict( - _delete_=True, - policy='poly', - warmup='linear', - warmup_iters=1500, - warmup_ratio=1e-6, - power=1.0, - min_lr=0.0, - by_epoch=False) +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + eta_min=0.0, + power=1.0, + begin=1500, + end=80000, + by_epoch=False, + ) +] # By default, models are trained on 8 GPUs with 2 images per GPU train_dataloader = dict(batch_size=2)