[Refactor]: Refacor scheduler and change scheduler to param_scheduler

2022-06-06 03:06:32 +00:00 · 2022-06-06 03:06:32 +00:00 · ded3dc9640
parent 0f10a4debe
commit ded3dc9640
28 changed files with 218 additions and 151 deletions
--- a/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py
+++ b/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py
@ -1,14 +1,17 @@
 # optimizer
 optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05)

-# learning policy
-lr_config = dict(
-    policy='CosineAnnealing',
-    min_lr=0.,
-    warmup='linear',
-    warmup_iters=5,
-    warmup_ratio=1e-4,  # cannot be 0
-    warmup_by_epoch=True)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-4,
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=100)
--- a/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py
+++ b/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py
@ -1,8 +1,10 @@
 # optimizer
 optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.)

-# learning policy
-lr_config = dict(policy='CosineAnnealing', min_lr=0.)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=90)
--- a/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py
+++ b/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py
@ -1,8 +1,10 @@
 # optimizer
 optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6)

-# learning policy
-lr_config = dict(policy='CosineAnnealing', min_lr=0.)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=100)
--- a/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py
+++ b/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py
@ -1,8 +1,10 @@
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4)

-# learning policy
-lr_config = dict(policy='step', step=[60, 80])
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[60, 80], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=100)
--- a/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py
+++ b/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py
@ -11,8 +11,10 @@ model = dict(head=dict(num_classes=10))
 # optimizer
 optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4)

-# learning policy
-lr_config = dict(policy='step', step=[150, 250])
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[150, 250], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=350)
--- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py
@ -22,8 +22,10 @@ optimizer = dict(
    weight_decay=1e-4,
    paramwise_options={'\\Ahead.': dict(lr_mult=1)})

-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=20)
--- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
+++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py
@ -22,8 +22,10 @@ optimizer = dict(
    weight_decay=5e-4,
    paramwise_options={'\\Ahead.': dict(lr_mult=1)})

-# learning policy
-lr_config = dict(policy='step', step=[12, 16], gamma=0.2)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=20)
--- a/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py
@ -8,8 +8,11 @@ _base_ = [
 # model settings
 model = dict(backbone=dict(norm_cfg=dict(type='SyncBN')))

-# learning policy
-lr_config = dict(step=[30, 60, 90])
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=90)
--- a/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py
@ -41,8 +41,11 @@ optimizer = dict(
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)

-# learning policy
-lr_config = dict(policy='step', step=[30, 60, 90])
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=90)
--- a/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py
@ -83,15 +83,24 @@ optimizer = dict(
 # clip gradient
 optimizer_config = dict(grad_clip=dict(max_norm=5.0))

-# learning policy
-lr_config = dict(
-    policy='CosineAnnealing',
-    min_lr=2.5e-7 * 2048 / 512,
-    warmup='linear',
-    warmup_iters=20,
-    warmup_ratio=2.5e-7 / 1.25e-3,
-    warmup_by_epoch=True,
-    by_epoch=False)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=2.5e-7 / 1.25e-3,
+        by_epoch=True,
+        begin=0,
+        end=20,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=80,
+        eta_min=2.5e-7 * 2048 / 512,
+        by_epoch=True,
+        begin=20,
+        end=100,
+        convert_to_iter_based=True)
+]

 # mixed precision
 fp16 = dict(loss_scale='dynamic')
--- a/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
+++ b/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
@ -77,15 +77,24 @@ optimizer = dict(
    model_type='vit',
    layer_decay=0.65)

-# learning policy
-lr_config = dict(
-    policy='StepFixCosineAnnealing',
-    min_lr=1e-6,
-    warmup='linear',
-    warmup_iters=5,
-    warmup_ratio=1e-4,
-    warmup_by_epoch=True,
-    by_epoch=False)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-4,
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=95,
+        eta_min=1e-6,
+        by_epoch=True,
+        begin=5,
+        end=100,
+        convert_to_iter_based=True)
+]

 # runtime
 checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='')
--- a/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py
+++ b/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py
@ -21,8 +21,11 @@ optimizer = dict(
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)

-# learning policy
-lr_config = dict(policy='step', step=[24, 48, 72])
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='MultiStepLR', by_epoch=True, milestones=[24, 48, 72], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=84)
--- a/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py
+++ b/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py
@ -21,8 +21,10 @@ optimizer = dict(
    paramwise_options=dict(norm_decay_mult=0.),
    nesterov=True)

-# learning policy
-lr_config = dict(policy='step', step=[7, 14, 21])
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[7, 14, 21], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=28)
--- a/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py
+++ b/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py
@ -2,22 +2,17 @@
 optimizer = dict(type='AdamW', lr=1.5e-4, betas=(0.9, 0.95), weight_decay=0.05)
 optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb

-# learning policy
-scheduler = [
+# learning rate scheduler
+param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1e-4,
-        by_epoch=False,
+        by_epoch=True,
        begin=0,
        end=40,
        convert_to_iter_based=True),
    dict(
-        type='CosineAnnealingLR',
-        T_max=260,
-        by_epoch=False,
-        begin=40,
-        end=300,
-        convert_to_iter_based=True)
+        type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
 ]

 # runtime settings
--- a/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py
+++ b/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py
@ -2,22 +2,17 @@
 optimizer = dict(type='AdamW', lr=6e-4, weight_decay=0.1)
 optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb

-# learning policy
-scheduler = [
+# learning rate scheduler
+param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1e-4,
-        by_epoch=False,
+        by_epoch=True,
        begin=0,
        end=40,
        convert_to_iter_based=True),
    dict(
-        type='CosineAnnealingLR',
-        T_max=260,
-        by_epoch=False,
-        begin=40,
-        end=300,
-        convert_to_iter_based=True)
+        type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300)
 ]

 # runtime settings
--- a/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py
+++ b/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py
@ -2,22 +2,17 @@
 optimizer = dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9)
 optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb

-# learning policy
-scheduler = [
+# learning rate scheduler
+param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1e-4,
-        by_epoch=False,
+        by_epoch=True,
        begin=0,
        end=10,
        convert_to_iter_based=True),
    dict(
-        type='CosineAnnealingLR',
-        T_max=190,
-        by_epoch=False,
-        begin=10,
-        end=200,
-        convert_to_iter_based=True)
+        type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200)
 ]

 # runtime settings
--- a/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py
+++ b/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py
@ -2,15 +2,9 @@
 optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
 optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb

-# learning policy
-scheduler = [
-    dict(
-        type='CosineAnnealingLR',
-        T_max=200,
-        by_epoch=False,
-        begin=0,
-        end=200,
-        convert_to_iter_based=True)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200)
 ]

 # runtime settings
--- a/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py
+++ b/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py
@ -2,8 +2,8 @@
 optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9)
 optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb

-# learning policy
-scheduler = [
+# learning rate scheduler
+param_scheduler = [
    dict(type='MultiStepLR', by_epoch=True, milestones=[120, 160], gamma=0.1)
 ]

--- a/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py
+++ b/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py
@ -23,15 +23,23 @@ optimizer = dict(
        dict(weight_decay=0, lr_mult=0.024, lars_exclude=True),
    })

-# learning policy
-lr_config = dict(
-    policy='CosineAnnealing',
-    by_epoch=False,
-    min_lr=0.0016,
-    warmup='linear',
-    warmup_iters=10,
-    warmup_ratio=1.6e-4,  # cannot be 0
-    warmup_by_epoch=True)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1.6e-4,
+        by_epoch=True,
+        begin=0,
+        end=10,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=190,
+        eta_min=0.0016,
+        by_epoch=True,
+        begin=10,
+        end=200)
+]

 # runtime settings
 # the max_keep_ckpts controls the max number of ckpt file in your work_dirs
--- a/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py
+++ b/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py
@ -18,15 +18,24 @@ optimizer = dict(
    },
    betas=(0.9, 0.999))

-# learning policy
-lr_config = dict(
-    policy='StepFixCosineAnnealing',
-    min_lr=1e-5,
-    warmup='linear',
-    warmup_iters=10,
-    warmup_ratio=1e-4,
-    warmup_by_epoch=True,
-    by_epoch=False)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-4,
+        by_epoch=True,
+        begin=0,
+        end=10,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=290,
+        eta_min=1e-5,
+        by_epoch=True,
+        begin=10,
+        end=300,
+        convert_to_iter_based=True)
+]

 # schedule
 runner = dict(max_epochs=300)
--- a/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py
+++ b/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py
@ -15,8 +15,10 @@ optimizer = dict(
    weight_decay=1e-5,
    paramwise_options={'\\Ahead.': dict(momentum=0.)})

-# learning policy
-lr_config = dict(policy='step', step=[400])
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.1)
+]

 # runtime settings
 # the max_keep_ckpts controls the max number of ckpt file in your work_dirs
--- a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py
+++ b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py
@ -25,28 +25,19 @@ scheduler = [
    dict(
        type='LinearLR',
        start_factor=1e-4,
-        by_epoch=False,
+        by_epoch=True,
        begin=0,
        end=40,
        convert_to_iter_based=True),
    dict(
        type='CosineAnnealingLR',
        T_max=360,
-        by_epoch=False,
+        by_epoch=True,
        begin=40,
        end=400,
        convert_to_iter_based=True)
 ]

-lr_config = dict(
-    policy='StepFixCosineAnnealing',
-    min_lr=0.0,
-    warmup='linear',
-    warmup_iters=40,
-    warmup_ratio=1e-4,
-    warmup_by_epoch=True,
-    by_epoch=False)
-
 # schedule
 runner = dict(max_epochs=400)

--- a/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py
+++ b/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py
@ -19,8 +19,10 @@ optimizer = dict(
    weight_decay=1e-5,
    paramwise_options={'\\Ahead.': dict(momentum=0.)})

-# learning policy
-lr_config = dict(policy='step', step=[400], gamma=0.4)
+# learning rate scheduler
+param_scheduler = [
+    dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=440)
--- a/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py
+++ b/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py
@ -16,14 +16,17 @@ optimizer = dict(
        '\\Ahead.': dict(weight_decay=5e-4)
    })

-# learning policy
-lr_config = dict(
-    policy='step',
-    step=[30, 50],
-    warmup='linear',
-    warmup_iters=5,  # 5 ep
-    warmup_ratio=0.1,
-    warmup_by_epoch=True)
+# learning rate scheduler
+scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=0.1,
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=70)
--- a/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py
+++ b/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py
@ -8,14 +8,17 @@ _base_ = [
 # optimizer
 optimizer = dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=1e-4)

-# learning policy
-lr_config = dict(
-    policy='step',
-    step=[30, 50],
-    warmup='linear',
-    warmup_iters=5,  # 5 ep
-    warmup_ratio=0.1,
-    warmup_by_epoch=True)
+# learning rate scheduler
+scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=0.1,
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1)
+]

 # runtime settings
 runner = dict(type='EpochBasedRunner', max_epochs=70)
--- a/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py
+++ b/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py
@ -17,14 +17,23 @@ optimizer = dict(
        'bias': dict(weight_decay=0., lars_exclude=True)
    })

-# learning policy
-lr_config = dict(
-    policy='CosineAnnealing',
-    min_lr=0.,
-    warmup='linear',
-    warmup_iters=10,
-    warmup_ratio=1e-4,
-    warmup_by_epoch=True)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-4,
+        by_epoch=True,
+        begin=0,
+        end=10,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=190,
+        eta_min=0.,
+        by_epoch=True,
+        begin=10,
+        end=200)
+]

 # runtime settings
 # the max_keep_ckpts controls the max number of ckpt file in your work_dirs
--- a/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py
+++ b/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py
@ -23,15 +23,24 @@ optimizer = dict(
 # clip gradient
 optimizer_config = dict(grad_clip=dict(max_norm=5.0))

-# learning policy
-lr_config = dict(
-    policy='CosineAnnealing',
-    min_lr=1e-5 * 2048 / 512,
-    warmup='linear',
-    warmup_iters=10,
-    warmup_ratio=1e-6 / 2e-4,
-    warmup_by_epoch=True,
-    by_epoch=False)
+# learning rate scheduler
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1e-6 / 2e-4,
+        by_epoch=True,
+        begin=0,
+        end=10,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        T_max=90,
+        eta_min=1e-5 * 2048 / 512,
+        by_epoch=True,
+        begin=10,
+        end=100,
+        convert_to_iter_based=True)
+]

 # mixed precision
 fp16 = dict(loss_scale='dynamic')
--- a/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py
+++ b/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py
@ -28,7 +28,15 @@ optimizer = dict(type='LARS', lr=0.6)
 optimizer_config = dict(frozen_layers_cfg=dict(prototypes=5005))

 # learning policy
-lr_config = dict(_delete_=True, policy='CosineAnnealing', min_lr=6e-4)
+param_scheduler = [
+    dict(
+        type='CosineAnnealingLR',
+        T_max=200,
+        eta_min=6e-4,
+        by_epoch=True,
+        begin=0,
+        end=200)
+]

 # runtime settings
 # the max_keep_ckpts controls the max number of ckpt file in your work_dirs