mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
Merge branch 'limengzhang/fix_optim_wrapper' into 'refactor_dev'
[Fix] Remove OptimizerHook and Add it in OptimWrapper See merge request openmmlab-enterprise/openmmlab-ce/mmsegmentation!47
This commit is contained in:
commit
46723a9543
@ -1,6 +1,6 @@
|
|||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -17,7 +17,6 @@ train_cfg = dict(
|
|||||||
val_cfg = dict(type='ValLoop')
|
val_cfg = dict(type='ValLoop')
|
||||||
test_cfg = dict(type='TestLoop')
|
test_cfg = dict(type='TestLoop')
|
||||||
default_hooks = dict(
|
default_hooks = dict(
|
||||||
optimizer=dict(type='OptimizerHook', grad_clip=None),
|
|
||||||
timer=dict(type='IterTimerHook'),
|
timer=dict(type='IterTimerHook'),
|
||||||
logger=dict(type='LoggerHook', interval=50),
|
logger=dict(type='LoggerHook', interval=50),
|
||||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
|
|||||||
val_cfg = dict(type='ValLoop')
|
val_cfg = dict(type='ValLoop')
|
||||||
test_cfg = dict(type='TestLoop')
|
test_cfg = dict(type='TestLoop')
|
||||||
default_hooks = dict(
|
default_hooks = dict(
|
||||||
optimizer=dict(type='OptimizerHook', grad_clip=None),
|
|
||||||
timer=dict(type='IterTimerHook'),
|
timer=dict(type='IterTimerHook'),
|
||||||
logger=dict(type='LoggerHook', interval=50),
|
logger=dict(type='LoggerHook', interval=50),
|
||||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -17,7 +17,6 @@ train_cfg = dict(
|
|||||||
val_cfg = dict(type='ValLoop')
|
val_cfg = dict(type='ValLoop')
|
||||||
test_cfg = dict(type='TestLoop')
|
test_cfg = dict(type='TestLoop')
|
||||||
default_hooks = dict(
|
default_hooks = dict(
|
||||||
optimizer=dict(type='OptimizerHook', grad_clip=None),
|
|
||||||
timer=dict(type='IterTimerHook'),
|
timer=dict(type='IterTimerHook'),
|
||||||
logger=dict(type='LoggerHook', interval=50),
|
logger=dict(type='LoggerHook', interval=50),
|
||||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
|
|||||||
val_cfg = dict(type='ValLoop')
|
val_cfg = dict(type='ValLoop')
|
||||||
test_cfg = dict(type='TestLoop')
|
test_cfg = dict(type='TestLoop')
|
||||||
default_hooks = dict(
|
default_hooks = dict(
|
||||||
optimizer=dict(type='OptimizerHook', grad_clip=None),
|
|
||||||
timer=dict(type='IterTimerHook'),
|
timer=dict(type='IterTimerHook'),
|
||||||
logger=dict(type='LoggerHook', interval=50),
|
logger=dict(type='LoggerHook', interval=50),
|
||||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -16,7 +16,6 @@ train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000)
|
|||||||
val_cfg = dict(type='ValLoop')
|
val_cfg = dict(type='ValLoop')
|
||||||
test_cfg = dict(type='TestLoop')
|
test_cfg = dict(type='TestLoop')
|
||||||
default_hooks = dict(
|
default_hooks = dict(
|
||||||
optimizer=dict(type='OptimizerHook', grad_clip=None),
|
|
||||||
timer=dict(type='IterTimerHook'),
|
timer=dict(type='IterTimerHook'),
|
||||||
logger=dict(type='LoggerHook', interval=50),
|
logger=dict(type='LoggerHook', interval=50),
|
||||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||||
|
@ -31,10 +31,12 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
constructor='LayerDecayOptimizerConstructor',
|
constructor='LayerDecayOptimizerConstructor',
|
||||||
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
|
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95),
|
||||||
|
accumulative_counts=2)
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -52,7 +54,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=1)
|
train_dataloader = dict(batch_size=1)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
optimizer_config = dict(
|
|
||||||
type='GradientCumulativeFp16OptimizerHook', cumulative_iters=2)
|
|
||||||
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
_base_ = './bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py'
|
_base_ = './bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py'
|
||||||
# fp16 settings
|
optimizer = dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005)
|
||||||
default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.))
|
optim_wrapper = dict(
|
||||||
# fp16 placeholder
|
_delete_=True,
|
||||||
fp16 = dict()
|
type='AmpOptimWrapper',
|
||||||
|
optimizer=optimizer,
|
||||||
|
loss_scale=512.)
|
||||||
|
@ -19,14 +19,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 12
|
'num_layers': 12
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -45,8 +47,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -34,14 +34,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 12
|
'num_layers': 12
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -60,8 +62,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -34,14 +34,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 12
|
'num_layers': 12
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -60,8 +62,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -33,14 +33,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 12
|
'num_layers': 12
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -59,8 +61,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -33,14 +33,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 6
|
'num_layers': 6
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -59,8 +61,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -34,14 +34,16 @@ optimizer = dict(
|
|||||||
weight_decay=0.05)
|
weight_decay=0.05)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
type='OptimWrapper',
|
_delete_=True,
|
||||||
|
type='AmpOptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg={
|
paramwise_cfg={
|
||||||
'decay_rate': 0.9,
|
'decay_rate': 0.9,
|
||||||
'decay_type': 'stage_wise',
|
'decay_type': 'stage_wise',
|
||||||
'num_layers': 12
|
'num_layers': 12
|
||||||
},
|
},
|
||||||
constructor='LearningRateDecayOptimizerConstructor')
|
constructor='LearningRateDecayOptimizerConstructor',
|
||||||
|
loss_scale='dynamic')
|
||||||
|
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
@ -60,8 +62,3 @@ param_scheduler = [
|
|||||||
train_dataloader = dict(batch_size=2)
|
train_dataloader = dict(batch_size=2)
|
||||||
val_dataloader = dict(batch_size=1)
|
val_dataloader = dict(batch_size=1)
|
||||||
test_dataloader = val_dataloader
|
test_dataloader = val_dataloader
|
||||||
# fp16 settings
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(type='Fp16OptimizerHook', loss_scale='dynamic'))
|
|
||||||
# fp16 placeholder
|
|
||||||
fp16 = dict()
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
_base_ = './deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
|
_base_ = './deeplabv3_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
# fp16 settings
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.))
|
optim_wrapper = dict(
|
||||||
# fp16 placeholder
|
_delete_=True,
|
||||||
fp16 = dict()
|
type='AmpOptimWrapper',
|
||||||
|
optimizer=optimizer,
|
||||||
|
loss_scale=512.)
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
_base_ = './deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
|
_base_ = './deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
# fp16 settings
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.))
|
optim_wrapper = dict(
|
||||||
# fp16 placeholder
|
_delete_=True,
|
||||||
fp16 = dict()
|
type='AmpOptimWrapper',
|
||||||
|
optimizer=optimizer,
|
||||||
|
loss_scale=512.)
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
_base_ = './fcn_r101-d8_512x1024_80k_cityscapes.py'
|
_base_ = './fcn_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
# fp16 settings
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.))
|
optim_wrapper = dict(
|
||||||
# fp16 placeholder
|
_delete_=True,
|
||||||
fp16 = dict()
|
type='AmpOptimWrapper',
|
||||||
|
optimizer=optimizer,
|
||||||
|
loss_scale=512.)
|
||||||
|
@ -81,12 +81,11 @@ model = dict(
|
|||||||
|
|
||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -81,12 +81,12 @@ model = dict(
|
|||||||
test_cfg=dict(mode='whole'))
|
test_cfg=dict(mode='whole'))
|
||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -80,12 +80,11 @@ model = dict(
|
|||||||
test_cfg=dict(mode='whole'))
|
test_cfg=dict(mode='whole'))
|
||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -81,12 +81,11 @@ model = dict(
|
|||||||
test_cfg=dict(mode='whole'))
|
test_cfg=dict(mode='whole'))
|
||||||
# optimizer
|
# optimizer
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -39,6 +39,7 @@ optimizer = dict(
|
|||||||
weight_decay=0.0005)
|
weight_decay=0.0005)
|
||||||
|
|
||||||
optim_wrapper = dict(
|
optim_wrapper = dict(
|
||||||
|
_delete_=True,
|
||||||
type='OptimWrapper',
|
type='OptimWrapper',
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
paramwise_cfg=dict(
|
paramwise_cfg=dict(
|
||||||
@ -46,13 +47,9 @@ optim_wrapper = dict(
|
|||||||
'absolute_pos_embed': dict(decay_mult=0.),
|
'absolute_pos_embed': dict(decay_mult=0.),
|
||||||
'relative_position_bias_table': dict(decay_mult=0.),
|
'relative_position_bias_table': dict(decay_mult=0.),
|
||||||
'norm': dict(decay_mult=0.)
|
'norm': dict(decay_mult=0.)
|
||||||
}))
|
}),
|
||||||
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
|
|
||||||
default_hooks = dict(
|
|
||||||
optimizer=dict(
|
|
||||||
_delete_=True,
|
|
||||||
type='OptimizerHook',
|
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
_base_ = './pspnet_r101-d8_512x1024_80k_cityscapes.py'
|
_base_ = './pspnet_r101-d8_512x1024_80k_cityscapes.py'
|
||||||
# fp16 settings
|
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||||
default_hooks = dict(optimizer=dict(type='Fp16OptimizerHook', loss_scale=512.))
|
optim_wrapper = dict(
|
||||||
# fp16 placeholder
|
_delete_=True,
|
||||||
fp16 = dict()
|
type='AmpOptimWrapper',
|
||||||
|
optimizer=optimizer,
|
||||||
|
loss_scale=512.)
|
||||||
|
@ -16,12 +16,11 @@ model = dict(
|
|||||||
strides=(1, 2, 2, 2)))
|
strides=(1, 2, 2, 2)))
|
||||||
|
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
@ -14,12 +14,11 @@ model = dict(
|
|||||||
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))
|
type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))
|
||||||
|
|
||||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05)
|
optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05)
|
||||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer)
|
optim_wrapper = dict(
|
||||||
default_hooks = dict(
|
_delete_=True,
|
||||||
optimizer=dict(
|
type='OptimWrapper',
|
||||||
_delete_=True,
|
optimizer=optimizer,
|
||||||
type='OptimizerHook',
|
clip_grad=dict(max_norm=1, norm_type=2))
|
||||||
grad_clip=dict(max_norm=1, norm_type=2)))
|
|
||||||
# learning policy
|
# learning policy
|
||||||
param_scheduler = [
|
param_scheduler = [
|
||||||
dict(
|
dict(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user