[Refactor]: Refacor scheduler and change scheduler to param_scheduler
parent
0f10a4debe
commit
ded3dc9640
|
@ -1,14 +1,17 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
min_lr=0.,
|
||||
warmup='linear',
|
||||
warmup_iters=5,
|
||||
warmup_ratio=1e-4, # cannot be 0
|
||||
warmup_by_epoch=True)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='CosineAnnealing', min_lr=0.)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[60, 80])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[60, 80], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=100)
|
||||
|
|
|
@ -11,8 +11,10 @@ model = dict(head=dict(num_classes=10))
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[150, 250])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[150, 250], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=350)
|
||||
|
|
|
@ -22,8 +22,10 @@ optimizer = dict(
|
|||
weight_decay=1e-4,
|
||||
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=20)
|
||||
|
|
|
@ -22,8 +22,10 @@ optimizer = dict(
|
|||
weight_decay=5e-4,
|
||||
paramwise_options={'\\Ahead.': dict(lr_mult=1)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=20)
|
||||
|
|
|
@ -8,8 +8,11 @@ _base_ = [
|
|||
# model settings
|
||||
model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(step=[30, 60, 90])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
||||
|
|
|
@ -41,8 +41,11 @@ optimizer = dict(
|
|||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[30, 60, 90])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=90)
|
||||
|
|
|
@ -83,15 +83,24 @@ optimizer = dict(
|
|||
# clip gradient
|
||||
optimizer_config = dict(grad_clip=dict(max_norm=5.0))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
min_lr=2.5e-7 * 2048 / 512,
|
||||
warmup='linear',
|
||||
warmup_iters=20,
|
||||
warmup_ratio=2.5e-7 / 1.25e-3,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=False)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=2.5e-7 / 1.25e-3,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=20,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=80,
|
||||
eta_min=2.5e-7 * 2048 / 512,
|
||||
by_epoch=True,
|
||||
begin=20,
|
||||
end=100,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# mixed precision
|
||||
fp16 = dict(loss_scale='dynamic')
|
||||
|
|
|
@ -77,15 +77,24 @@ optimizer = dict(
|
|||
model_type='vit',
|
||||
layer_decay=0.65)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='StepFixCosineAnnealing',
|
||||
min_lr=1e-6,
|
||||
warmup='linear',
|
||||
warmup_iters=5,
|
||||
warmup_ratio=1e-4,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=False)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=95,
|
||||
eta_min=1e-6,
|
||||
by_epoch=True,
|
||||
begin=5,
|
||||
end=100,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# runtime
|
||||
checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='')
|
||||
|
|
|
@ -21,8 +21,11 @@ optimizer = dict(
|
|||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[24, 48, 72])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='MultiStepLR', by_epoch=True, milestones=[24, 48, 72], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=84)
|
||||
|
|
|
@ -21,8 +21,10 @@ optimizer = dict(
|
|||
paramwise_options=dict(norm_decay_mult=0.),
|
||||
nesterov=True)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[7, 14, 21])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[7, 14, 21], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=28)
|
||||
|
|
|
@ -2,22 +2,17 @@
|
|||
optimizer = dict(type='AdamW', lr=1.5e-4, betas=(0.9, 0.95), weight_decay=0.05)
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
|
||||
# learning policy
|
||||
scheduler = [
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=False,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=260,
|
||||
by_epoch=False,
|
||||
begin=40,
|
||||
end=300,
|
||||
convert_to_iter_based=True)
|
||||
type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
|
|
|
@ -2,22 +2,17 @@
|
|||
optimizer = dict(type='AdamW', lr=6e-4, weight_decay=0.1)
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
|
||||
# learning policy
|
||||
scheduler = [
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=False,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=260,
|
||||
by_epoch=False,
|
||||
begin=40,
|
||||
end=300,
|
||||
convert_to_iter_based=True)
|
||||
type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
|
|
|
@ -2,22 +2,17 @@
|
|||
optimizer = dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9)
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
|
||||
# learning policy
|
||||
scheduler = [
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=False,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=10,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=190,
|
||||
by_epoch=False,
|
||||
begin=10,
|
||||
end=200,
|
||||
convert_to_iter_based=True)
|
||||
type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
|
|
|
@ -2,15 +2,9 @@
|
|||
optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
|
||||
# learning policy
|
||||
scheduler = [
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=200,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=200,
|
||||
convert_to_iter_based=True)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
|
||||
optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb
|
||||
|
||||
# learning policy
|
||||
scheduler = [
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[120, 160], gamma=0.1)
|
||||
]
|
||||
|
||||
|
|
|
@ -23,15 +23,23 @@ optimizer = dict(
|
|||
dict(weight_decay=0, lr_mult=0.024, lars_exclude=True),
|
||||
})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
by_epoch=False,
|
||||
min_lr=0.0016,
|
||||
warmup='linear',
|
||||
warmup_iters=10,
|
||||
warmup_ratio=1.6e-4, # cannot be 0
|
||||
warmup_by_epoch=True)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.6e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=10,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=190,
|
||||
eta_min=0.0016,
|
||||
by_epoch=True,
|
||||
begin=10,
|
||||
end=200)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
|
|
|
@ -18,15 +18,24 @@ optimizer = dict(
|
|||
},
|
||||
betas=(0.9, 0.999))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='StepFixCosineAnnealing',
|
||||
min_lr=1e-5,
|
||||
warmup='linear',
|
||||
warmup_iters=10,
|
||||
warmup_ratio=1e-4,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=False)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=10,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=290,
|
||||
eta_min=1e-5,
|
||||
by_epoch=True,
|
||||
begin=10,
|
||||
end=300,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# schedule
|
||||
runner = dict(max_epochs=300)
|
||||
|
|
|
@ -15,8 +15,10 @@ optimizer = dict(
|
|||
weight_decay=1e-5,
|
||||
paramwise_options={'\\Ahead.': dict(momentum=0.)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[400])
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
|
|
|
@ -25,28 +25,19 @@ scheduler = [
|
|||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=False,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=40,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=360,
|
||||
by_epoch=False,
|
||||
by_epoch=True,
|
||||
begin=40,
|
||||
end=400,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
lr_config = dict(
|
||||
policy='StepFixCosineAnnealing',
|
||||
min_lr=0.0,
|
||||
warmup='linear',
|
||||
warmup_iters=40,
|
||||
warmup_ratio=1e-4,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=False)
|
||||
|
||||
# schedule
|
||||
runner = dict(max_epochs=400)
|
||||
|
||||
|
|
|
@ -19,8 +19,10 @@ optimizer = dict(
|
|||
weight_decay=1e-5,
|
||||
paramwise_options={'\\Ahead.': dict(momentum=0.)})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[400], gamma=0.4)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=440)
|
||||
|
|
|
@ -16,14 +16,17 @@ optimizer = dict(
|
|||
'\\Ahead.': dict(weight_decay=5e-4)
|
||||
})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
step=[30, 50],
|
||||
warmup='linear',
|
||||
warmup_iters=5, # 5 ep
|
||||
warmup_ratio=0.1,
|
||||
warmup_by_epoch=True)
|
||||
# learning rate scheduler
|
||||
scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=0.1,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=70)
|
||||
|
|
|
@ -8,14 +8,17 @@ _base_ = [
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=1e-4)
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
step=[30, 50],
|
||||
warmup='linear',
|
||||
warmup_iters=5, # 5 ep
|
||||
warmup_ratio=0.1,
|
||||
warmup_by_epoch=True)
|
||||
# learning rate scheduler
|
||||
scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=0.1,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=70)
|
||||
|
|
|
@ -17,14 +17,23 @@ optimizer = dict(
|
|||
'bias': dict(weight_decay=0., lars_exclude=True)
|
||||
})
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
min_lr=0.,
|
||||
warmup='linear',
|
||||
warmup_iters=10,
|
||||
warmup_ratio=1e-4,
|
||||
warmup_by_epoch=True)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=10,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=190,
|
||||
eta_min=0.,
|
||||
by_epoch=True,
|
||||
begin=10,
|
||||
end=200)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
|
|
|
@ -23,15 +23,24 @@ optimizer = dict(
|
|||
# clip gradient
|
||||
optimizer_config = dict(grad_clip=dict(max_norm=5.0))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
min_lr=1e-5 * 2048 / 512,
|
||||
warmup='linear',
|
||||
warmup_iters=10,
|
||||
warmup_ratio=1e-6 / 2e-4,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=False)
|
||||
# learning rate scheduler
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1e-6 / 2e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=10,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=90,
|
||||
eta_min=1e-5 * 2048 / 512,
|
||||
by_epoch=True,
|
||||
begin=10,
|
||||
end=100,
|
||||
convert_to_iter_based=True)
|
||||
]
|
||||
|
||||
# mixed precision
|
||||
fp16 = dict(loss_scale='dynamic')
|
||||
|
|
|
@ -28,7 +28,15 @@ optimizer = dict(type='LARS', lr=0.6)
|
|||
optimizer_config = dict(frozen_layers_cfg=dict(prototypes=5005))
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(_delete_=True, policy='CosineAnnealing', min_lr=6e-4)
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=200,
|
||||
eta_min=6e-4,
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=200)
|
||||
]
|
||||
|
||||
# runtime settings
|
||||
# the max_keep_ckpts controls the max number of ckpt file in your work_dirs
|
||||
|
|
Loading…
Reference in New Issue