From 4062e90206378ba7feb099366d2b08119caffecf Mon Sep 17 00:00:00 2001 From: "limengzhang.vendor" Date: Thu, 16 Jun 2022 13:24:19 +0000 Subject: [PATCH] [Fix] Remove OptimizerHook and Add it in OptimWrapper --- configs/_base_/schedules/schedule_160k.py | 3 +-- configs/_base_/schedules/schedule_20k.py | 3 +-- configs/_base_/schedules/schedule_320k.py | 3 +-- configs/_base_/schedules/schedule_40k.py | 3 +-- configs/_base_/schedules/schedule_80k.py | 3 +-- ...pernet_beit-large_fp16_8x1_640x640_160k_ade20k.py | 10 ++++------ ...senetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py | 10 ++++++---- ...upernet_convnext_base_fp16_512x512_160k_ade20k.py | 11 ++++------- ...upernet_convnext_base_fp16_640x640_160k_ade20k.py | 11 ++++------- ...pernet_convnext_large_fp16_640x640_160k_ade20k.py | 11 ++++------- ...pernet_convnext_small_fp16_512x512_160k_ade20k.py | 11 ++++------- ...upernet_convnext_tiny_fp16_512x512_160k_ade20k.py | 11 ++++------- ...ernet_convnext_xlarge_fp16_640x640_160k_ade20k.py | 11 ++++------- ...deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py | 10 ++++++---- ...labv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py | 10 ++++++---- .../fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py | 10 ++++++---- ..._deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 11 +++++------ ...net_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 12 ++++++------ ..._s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 11 +++++------ ...s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py | 11 +++++------ ...s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py | 9 +++------ .../pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py | 10 ++++++---- ...d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py | 11 +++++------ ...-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py | 11 +++++------ 24 files changed, 97 insertions(+), 120 deletions(-) diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py index 1ac73e3c2..1055958a3 100644 --- a/configs/_base_/schedules/schedule_160k.py +++ b/configs/_base_/schedules/schedule_160k.py @@ -1,6 +1,6 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # learning policy param_scheduler = [ dict( @@ -17,7 +17,6 @@ train_cfg = dict( val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict( - optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py index 97535a00c..b4bc083de 100644 --- a/configs/_base_/schedules/schedule_20k.py +++ b/configs/_base_/schedules/schedule_20k.py @@ -1,6 +1,6 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # learning policy param_scheduler = [ dict( @@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict( - optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/_base_/schedules/schedule_320k.py b/configs/_base_/schedules/schedule_320k.py index fff100ed2..1c6f3500e 100644 --- a/configs/_base_/schedules/schedule_320k.py +++ b/configs/_base_/schedules/schedule_320k.py @@ -1,6 +1,6 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # learning policy param_scheduler = [ dict( @@ -17,7 +17,6 @@ train_cfg = dict( val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict( - optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py index bfad0c345..d768012e8 100644 --- a/configs/_base_/schedules/schedule_40k.py +++ b/configs/_base_/schedules/schedule_40k.py @@ -1,6 +1,6 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # learning policy param_scheduler = [ dict( @@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict( - optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py index 3d8401b20..170a2ecb8 100644 --- a/configs/_base_/schedules/schedule_80k.py +++ b/configs/_base_/schedules/schedule_80k.py @@ -1,6 +1,6 @@ # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # learning policy param_scheduler = [ dict( @@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') default_hooks = dict( - optimizer=dict(type='OptimizerHook', grad_clip=None), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py index ca571ee8d..b1084f3bc 100644 --- a/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py +++ b/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -31,10 +31,12 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, constructor='LayerDecayOptimizerConstructor', - paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) + paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95), + accumulative_counts=2) param_scheduler = [ dict( @@ -52,7 +54,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=1) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -optimizer_config = dict( - type='GradientCumulativeFp16OptimizerHook', cumulative_iters=2) - -fp16 = dict() diff --git a/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py b/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py index 5ecc00bc6..4e04f49c5 100644 --- a/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py +++ b/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py @@ -1,5 +1,7 @@ _base_ = './bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py' -# fp16 settings -default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) -# fp16 placeholder -fp16 = dict() +optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py index ecb670dbd..5571c5c66 100644 --- a/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -19,14 +19,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -45,8 +47,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py index 16814b65a..096c71960 100644 --- a/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -34,14 +34,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -60,8 +62,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py index c2a5545cc..75907bca2 100644 --- a/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -34,14 +34,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -60,8 +62,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py index 3d4746e10..793086198 100644 --- a/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -33,14 +33,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -59,8 +61,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py index 99f8a6f9d..f872accf1 100644 --- a/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -33,14 +33,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 6 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -59,8 +61,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py index 59d8cd2a6..65a14f5a9 100644 --- a/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py +++ b/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -34,14 +34,16 @@ optimizer = dict( weight_decay=0.05) optim_wrapper = dict( - type='OptimWrapper', + _delete_=True, + type='AmpOptimWrapper', optimizer=optimizer, paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 12 }, - constructor='LearningRateDecayOptimizerConstructor') + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') param_scheduler = [ dict( @@ -60,8 +62,3 @@ param_scheduler = [ train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader -# fp16 settings -default_hooks = dict( - optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic')) -# fp16 placeholder -fp16 = dict() diff --git a/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py index f5f3a1878..096c55b64 100644 --- a/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,7 @@ _base_ = './deeplabv3_r101-d8_512x1024_80k_cityscapes.py' -# fp16 settings -default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) -# fp16 placeholder -fp16 = dict() +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py index a04393867..f92cf030e 100644 --- a/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,7 @@ _base_ = './deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' -# fp16 settings -default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) -# fp16 placeholder -fp16 = dict() +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py index fdfcb6d13..769b48fe9 100644 --- a/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,7 @@ _base_ = './fcn_r101-d8_512x1024_80k_cityscapes.py' -# fp16 settings -default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) -# fp16 placeholder -fp16 = dict() +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py index fe9baf5d9..b2936570f 100644 --- a/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -81,12 +81,11 @@ model = dict( # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict( diff --git a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py index d2665f7ab..7cb59abae 100644 --- a/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -81,12 +81,12 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) + # learning policy param_scheduler = [ dict( diff --git a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 311571f20..aef80e011 100644 --- a/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -80,12 +80,11 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict( diff --git a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py index 75129131b..a852ffbd9 100644 --- a/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -81,12 +81,11 @@ model = dict( test_cfg=dict(mode='whole')) # optimizer optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict( diff --git a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py index 7bf81a8e6..2dac93ec8 100644 --- a/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py +++ b/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py @@ -39,6 +39,7 @@ optimizer = dict( weight_decay=0.0005) optim_wrapper = dict( + _delete_=True, type='OptimWrapper', optimizer=optimizer, paramwise_cfg=dict( @@ -46,13 +47,9 @@ optim_wrapper = dict( 'absolute_pos_embed': dict(decay_mult=0.), 'relative_position_bias_table': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) - })) + }), + clip_grad=dict(max_norm=1, norm_type=2)) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) # learning policy param_scheduler = [ dict( diff --git a/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py b/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py index 604eb1b73..3cf1ed35a 100644 --- a/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -1,5 +1,7 @@ _base_ = './pspnet_r101-d8_512x1024_80k_cityscapes.py' -# fp16 settings -default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.)) -# fp16 placeholder -fp16 = dict() +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=optimizer, + loss_scale=512.) diff --git a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index 74d38afea..a36688684 100644 --- a/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -16,12 +16,11 @@ model = dict( strides=(1, 2, 2, 2))) optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict( diff --git a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py index fc138b775..3d8833155 100644 --- a/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py +++ b/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -14,12 +14,11 @@ model = dict( type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer) -default_hooks = dict( - optimizer=dict( - _delete_=True, - type='OptimizerHook', - grad_clip=dict(max_norm=1, norm_type=2))) +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=1, norm_type=2)) # learning policy param_scheduler = [ dict(