[Refactor]: Refacor scheduler and change scheduler to param_scheduler

pull/352/head
YuanLiuuuuuu 2022-06-06 03:06:32 +00:00 committed by fangyixiao18
parent 0f10a4debe
commit ded3dc9640
28 changed files with 218 additions and 151 deletions

View File

@ -1,14 +1,17 @@
# optimizer
optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-4, # cannot be 0
warmup_by_epoch=True)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)

View File

@ -1,8 +1,10 @@
# optimizer
optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)

View File

@ -1,8 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)

View File

@ -1,8 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)
# learning policy
lr_config = dict(policy='step', step=[60, 80])
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[60, 80], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)

View File

@ -11,8 +11,10 @@ model = dict(head=dict(num_classes=10))
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)
# learning policy
lr_config = dict(policy='step', step=[150, 250])
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[150, 250], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=350)

View File

@ -22,8 +22,10 @@ optimizer = dict(
weight_decay=1e-4,
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
# learning policy
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)

View File

@ -22,8 +22,10 @@ optimizer = dict(
weight_decay=5e-4,
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
# learning policy
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)

View File

@ -8,8 +8,11 @@ _base_ = [
# model settings
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
# learning policy
lr_config = dict(step=[30, 60, 90])
# learning rate scheduler
param_scheduler = [
dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)

View File

@ -41,8 +41,11 @@ optimizer = dict(
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[30, 60, 90])
# learning rate scheduler
param_scheduler = [
dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)

View File

@ -83,15 +83,24 @@ optimizer = dict(
# clip gradient
optimizer_config = dict(grad_clip=dict(max_norm=5.0))
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=2.5e-7 * 2048 / 512,
warmup='linear',
warmup_iters=20,
warmup_ratio=2.5e-7 / 1.25e-3,
warmup_by_epoch=True,
by_epoch=False)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=2.5e-7 / 1.25e-3,
by_epoch=True,
begin=0,
end=20,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=80,
eta_min=2.5e-7 * 2048 / 512,
by_epoch=True,
begin=20,
end=100,
convert_to_iter_based=True)
]
# mixed precision
fp16 = dict(loss_scale='dynamic')

View File

@ -77,15 +77,24 @@ optimizer = dict(
model_type='vit',
layer_decay=0.65)
# learning policy
lr_config = dict(
policy='StepFixCosineAnnealing',
min_lr=1e-6,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-4,
warmup_by_epoch=True,
by_epoch=False)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=95,
eta_min=1e-6,
by_epoch=True,
begin=5,
end=100,
convert_to_iter_based=True)
]
# runtime
checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='')

View File

@ -21,8 +21,11 @@ optimizer = dict(
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[24, 48, 72])
# learning rate scheduler
param_scheduler = [
dict(
type='MultiStepLR', by_epoch=True, milestones=[24, 48, 72], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=84)

View File

@ -21,8 +21,10 @@ optimizer = dict(
paramwise_options=dict(norm_decay_mult=0.),
nesterov=True)
# learning policy
lr_config = dict(policy='step', step=[7, 14, 21])
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[7, 14, 21], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=28)

View File

@ -2,22 +2,17 @@
optimizer = dict(type='AdamW', lr=1.5e-4, betas=(0.9, 0.95), weight_decay=0.05)
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# learning policy
scheduler = [
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=False,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=260,
by_epoch=False,
begin=40,
end=300,
convert_to_iter_based=True)
type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
]
# runtime settings

View File

@ -2,22 +2,17 @@
optimizer = dict(type='AdamW', lr=6e-4, weight_decay=0.1)
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# learning policy
scheduler = [
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=False,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=260,
by_epoch=False,
begin=40,
end=300,
convert_to_iter_based=True)
type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
]
# runtime settings

View File

@ -2,22 +2,17 @@
optimizer = dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9)
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# learning policy
scheduler = [
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=False,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=190,
by_epoch=False,
begin=10,
end=200,
convert_to_iter_based=True)
type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200)
]
# runtime settings

View File

@ -2,15 +2,9 @@
optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# learning policy
scheduler = [
dict(
type='CosineAnnealingLR',
T_max=200,
by_epoch=False,
begin=0,
end=200,
convert_to_iter_based=True)
# learning rate scheduler
param_scheduler = [
dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200)
]
# runtime settings

View File

@ -2,8 +2,8 @@
optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
# learning policy
scheduler = [
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[120, 160], gamma=0.1)
]

View File

@ -23,15 +23,23 @@ optimizer = dict(
dict(weight_decay=0, lr_mult=0.024, lars_exclude=True),
})
# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False,
min_lr=0.0016,
warmup='linear',
warmup_iters=10,
warmup_ratio=1.6e-4, # cannot be 0
warmup_by_epoch=True)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.6e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=190,
eta_min=0.0016,
by_epoch=True,
begin=10,
end=200)
]
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs

View File

@ -18,15 +18,24 @@ optimizer = dict(
},
betas=(0.9, 0.999))
# learning policy
lr_config = dict(
policy='StepFixCosineAnnealing',
min_lr=1e-5,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-4,
warmup_by_epoch=True,
by_epoch=False)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=300,
convert_to_iter_based=True)
]
# schedule
runner = dict(max_epochs=300)

View File

@ -15,8 +15,10 @@ optimizer = dict(
weight_decay=1e-5,
paramwise_options={'\\Ahead.': dict(momentum=0.)})
# learning policy
lr_config = dict(policy='step', step=[400])
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.1)
]
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs

View File

@ -25,28 +25,19 @@ scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=False,
by_epoch=True,
begin=0,
end=40,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=360,
by_epoch=False,
by_epoch=True,
begin=40,
end=400,
convert_to_iter_based=True)
]
lr_config = dict(
policy='StepFixCosineAnnealing',
min_lr=0.0,
warmup='linear',
warmup_iters=40,
warmup_ratio=1e-4,
warmup_by_epoch=True,
by_epoch=False)
# schedule
runner = dict(max_epochs=400)

View File

@ -19,8 +19,10 @@ optimizer = dict(
weight_decay=1e-5,
paramwise_options={'\\Ahead.': dict(momentum=0.)})
# learning policy
lr_config = dict(policy='step', step=[400], gamma=0.4)
# learning rate scheduler
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=440)

View File

@ -16,14 +16,17 @@ optimizer = dict(
'\\Ahead.': dict(weight_decay=5e-4)
})
# learning policy
lr_config = dict(
policy='step',
step=[30, 50],
warmup='linear',
warmup_iters=5, # 5 ep
warmup_ratio=0.1,
warmup_by_epoch=True)
# learning rate scheduler
scheduler = [
dict(
type='LinearLR',
start_factor=0.1,
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=70)

View File

@ -8,14 +8,17 @@ _base_ = [
# optimizer
optimizer = dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=1e-4)
# learning policy
lr_config = dict(
policy='step',
step=[30, 50],
warmup='linear',
warmup_iters=5, # 5 ep
warmup_ratio=0.1,
warmup_by_epoch=True)
# learning rate scheduler
scheduler = [
dict(
type='LinearLR',
start_factor=0.1,
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=70)

View File

@ -17,14 +17,23 @@ optimizer = dict(
'bias': dict(weight_decay=0., lars_exclude=True)
})
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-4,
warmup_by_epoch=True)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=190,
eta_min=0.,
by_epoch=True,
begin=10,
end=200)
]
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs

View File

@ -23,15 +23,24 @@ optimizer = dict(
# clip gradient
optimizer_config = dict(grad_clip=dict(max_norm=5.0))
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=1e-5 * 2048 / 512,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-6 / 2e-4,
warmup_by_epoch=True,
by_epoch=False)
# learning rate scheduler
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-6 / 2e-4,
by_epoch=True,
begin=0,
end=10,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=90,
eta_min=1e-5 * 2048 / 512,
by_epoch=True,
begin=10,
end=100,
convert_to_iter_based=True)
]
# mixed precision
fp16 = dict(loss_scale='dynamic')

View File

@ -28,7 +28,15 @@ optimizer = dict(type='LARS', lr=0.6)
optimizer_config = dict(frozen_layers_cfg=dict(prototypes=5005))
# learning policy
lr_config = dict(_delete_=True, policy='CosineAnnealing', min_lr=6e-4)
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=200,
eta_min=6e-4,
by_epoch=True,
begin=0,
end=200)
]
# runtime settings
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs