diff --git a/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py b/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py index ab979139..0f601657 100644 --- a/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py +++ b/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py @@ -1,14 +1,17 @@ # optimizer optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - min_lr=0., - warmup='linear', - warmup_iters=5, - warmup_ratio=1e-4, # cannot be 0 - warmup_by_epoch=True) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1e-4, + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True), + dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=100) diff --git a/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py b/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py index 4dfe75fd..c3999b96 100644 --- a/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py +++ b/configs/benchmarks/classification/_base_/schedules/lars_coslr-90e.py @@ -1,8 +1,10 @@ # optimizer optimizer = dict(type='LARS', lr=1.6, momentum=0.9, weight_decay=0.) -# learning policy -lr_config = dict(policy='CosineAnnealing', min_lr=0.) +# learning rate scheduler +param_scheduler = [ + dict(type='CosineAnnealingLR', T_max=90, by_epoch=True, begin=0, end=90) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=90) diff --git a/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py b/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py index 8a201173..43cf92b0 100644 --- a/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py +++ b/configs/benchmarks/classification/_base_/schedules/sgd_coslr-100e.py @@ -1,8 +1,10 @@ # optimizer optimizer = dict(type='SGD', lr=0.3, momentum=0.9, weight_decay=1e-6) -# learning policy -lr_config = dict(policy='CosineAnnealing', min_lr=0.) +# learning rate scheduler +param_scheduler = [ + dict(type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=100) diff --git a/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py b/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py index 3d58c8f9..b9b04594 100644 --- a/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py +++ b/configs/benchmarks/classification/_base_/schedules/sgd_steplr-100e.py @@ -1,8 +1,10 @@ # optimizer optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=1e-4) -# learning policy -lr_config = dict(policy='step', step=[60, 80]) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[60, 80], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=100) diff --git a/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py b/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py index fc396b29..c4294e48 100644 --- a/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py +++ b/configs/benchmarks/classification/cifar/resnet50_8xb128-steplr-350e_cifar10.py @@ -11,8 +11,10 @@ model = dict(head=dict(num_classes=10)) # optimizer optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=5e-4) -# learning policy -lr_config = dict(policy='step', step=[150, 250]) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[150, 250], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=350) diff --git a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py index 95d19a4f..87d43f73 100644 --- a/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py +++ b/configs/benchmarks/classification/imagenet/imagenet_10percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-10pct.py @@ -22,8 +22,10 @@ optimizer = dict( weight_decay=1e-4, paramwise_options={'\\Ahead.': dict(lr_mult=1)}) -# learning policy -lr_config = dict(policy='step', step=[12, 16], gamma=0.2) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py index 066c65d2..d2894e0f 100644 --- a/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py +++ b/configs/benchmarks/classification/imagenet/imagenet_1percent/resnet50_head1_4xb64-steplr1e-1-20e_in1k-1pct.py @@ -22,8 +22,10 @@ optimizer = dict( weight_decay=5e-4, paramwise_options={'\\Ahead.': dict(lr_mult=1)}) -# learning policy -lr_config = dict(policy='step', step=[12, 16], gamma=0.2) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[12, 16], gamma=0.2) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py b/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py index e837fb5f..9d084a79 100644 --- a/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py +++ b/configs/benchmarks/classification/imagenet/resnet50_8xb32-steplr-90e_in1k.py @@ -8,8 +8,11 @@ _base_ = [ # model settings model = dict(backbone=dict(norm_cfg=dict(type='SyncBN'))) -# learning policy -lr_config = dict(step=[30, 60, 90]) +# learning rate scheduler +param_scheduler = [ + dict( + type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=90) diff --git a/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py b/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py index c569c7fd..99553a89 100644 --- a/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py +++ b/configs/benchmarks/classification/imagenet/resnet50_mhead_linear-8xb32-steplr-90e_in1k.py @@ -41,8 +41,11 @@ optimizer = dict( paramwise_options=dict(norm_decay_mult=0.), nesterov=True) -# learning policy -lr_config = dict(policy='step', step=[30, 60, 90]) +# learning rate scheduler +param_scheduler = [ + dict( + type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=90) diff --git a/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py b/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py index 3140ce85..ec7e0076 100644 --- a/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py +++ b/configs/benchmarks/classification/imagenet/swin-base_ft-8xb256-coslr-100e_in1k.py @@ -83,15 +83,24 @@ optimizer = dict( # clip gradient optimizer_config = dict(grad_clip=dict(max_norm=5.0)) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - min_lr=2.5e-7 * 2048 / 512, - warmup='linear', - warmup_iters=20, - warmup_ratio=2.5e-7 / 1.25e-3, - warmup_by_epoch=True, - by_epoch=False) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=2.5e-7 / 1.25e-3, + by_epoch=True, + begin=0, + end=20, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=80, + eta_min=2.5e-7 * 2048 / 512, + by_epoch=True, + begin=20, + end=100, + convert_to_iter_based=True) +] # mixed precision fp16 = dict(loss_scale='dynamic') diff --git a/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py b/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py index 45dc96fc..4a221c7f 100644 --- a/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py +++ b/configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py @@ -77,15 +77,24 @@ optimizer = dict( model_type='vit', layer_decay=0.65) -# learning policy -lr_config = dict( - policy='StepFixCosineAnnealing', - min_lr=1e-6, - warmup='linear', - warmup_iters=5, - warmup_ratio=1e-4, - warmup_by_epoch=True, - by_epoch=False) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1e-4, + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=95, + eta_min=1e-6, + by_epoch=True, + begin=5, + end=100, + convert_to_iter_based=True) +] # runtime checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='') diff --git a/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py b/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py index 1dfeee21..7e59482d 100644 --- a/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py +++ b/configs/benchmarks/classification/inaturalist2018/resnet50_mhead_8xb32-steplr-84e_inat18.py @@ -21,8 +21,11 @@ optimizer = dict( paramwise_options=dict(norm_decay_mult=0.), nesterov=True) -# learning policy -lr_config = dict(policy='step', step=[24, 48, 72]) +# learning rate scheduler +param_scheduler = [ + dict( + type='MultiStepLR', by_epoch=True, milestones=[24, 48, 72], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=84) diff --git a/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py b/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py index 74d30605..153b805a 100644 --- a/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py +++ b/configs/benchmarks/classification/places205/resnet50_mhead_8xb32-steplr-28e_places205.py @@ -21,8 +21,10 @@ optimizer = dict( paramwise_options=dict(norm_decay_mult=0.), nesterov=True) -# learning policy -lr_config = dict(policy='step', step=[7, 14, 21]) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[7, 14, 21], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=28) diff --git a/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py b/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py index 15fe2a3a..d964c1ac 100644 --- a/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py +++ b/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py @@ -2,22 +2,17 @@ optimizer = dict(type='AdamW', lr=1.5e-4, betas=(0.9, 0.95), weight_decay=0.05) optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb -# learning policy -scheduler = [ +# learning rate scheduler +param_scheduler = [ dict( type='LinearLR', start_factor=1e-4, - by_epoch=False, + by_epoch=True, begin=0, end=40, convert_to_iter_based=True), dict( - type='CosineAnnealingLR', - T_max=260, - by_epoch=False, - begin=40, - end=300, - convert_to_iter_based=True) + type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300) ] # runtime settings diff --git a/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py b/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py index a1307099..2e9e0d10 100644 --- a/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py +++ b/configs/selfsup/_base_/schedules/adamw_coslr-300e_in1k.py @@ -2,22 +2,17 @@ optimizer = dict(type='AdamW', lr=6e-4, weight_decay=0.1) optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb -# learning policy -scheduler = [ +# learning rate scheduler +param_scheduler = [ dict( type='LinearLR', start_factor=1e-4, - by_epoch=False, + by_epoch=True, begin=0, end=40, convert_to_iter_based=True), dict( - type='CosineAnnealingLR', - T_max=260, - by_epoch=False, - begin=40, - end=300, - convert_to_iter_based=True) + type='CosineAnnealingLR', T_max=260, by_epoch=True, begin=40, end=300) ] # runtime settings diff --git a/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py b/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py index 74517b20..103daab7 100644 --- a/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py +++ b/configs/selfsup/_base_/schedules/lars_coslr-200e_in1k.py @@ -2,22 +2,17 @@ optimizer = dict(type='LARS', lr=4.8, weight_decay=1e-6, momentum=0.9) optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb -# learning policy -scheduler = [ +# learning rate scheduler +param_scheduler = [ dict( type='LinearLR', start_factor=1e-4, - by_epoch=False, + by_epoch=True, begin=0, end=10, convert_to_iter_based=True), dict( - type='CosineAnnealingLR', - T_max=190, - by_epoch=False, - begin=10, - end=200, - convert_to_iter_based=True) + type='CosineAnnealingLR', T_max=190, by_epoch=True, begin=10, end=200) ] # runtime settings diff --git a/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py b/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py index 877448b9..c8a5f9da 100644 --- a/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py +++ b/configs/selfsup/_base_/schedules/sgd_coslr-200e_in1k.py @@ -2,15 +2,9 @@ optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9) optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb -# learning policy -scheduler = [ - dict( - type='CosineAnnealingLR', - T_max=200, - by_epoch=False, - begin=0, - end=200, - convert_to_iter_based=True) +# learning rate scheduler +param_scheduler = [ + dict(type='CosineAnnealingLR', T_max=200, by_epoch=True, begin=0, end=200) ] # runtime settings diff --git a/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py b/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py index e8cd5e40..892e5b84 100644 --- a/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py +++ b/configs/selfsup/_base_/schedules/sgd_steplr-200e_in1k.py @@ -2,8 +2,8 @@ optimizer = dict(type='SGD', lr=0.03, weight_decay=1e-4, momentum=0.9) optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb -# learning policy -scheduler = [ +# learning rate scheduler +param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[120, 160], gamma=0.1) ] diff --git a/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py b/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py index e58821d4..cd24252a 100644 --- a/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py +++ b/configs/selfsup/barlowtwins/barlowtwins_resnet50_8xb256-coslr-300e_in1k.py @@ -23,15 +23,23 @@ optimizer = dict( dict(weight_decay=0, lr_mult=0.024, lars_exclude=True), }) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - by_epoch=False, - min_lr=0.0016, - warmup='linear', - warmup_iters=10, - warmup_ratio=1.6e-4, # cannot be 0 - warmup_by_epoch=True) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.6e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=190, + eta_min=0.0016, + by_epoch=True, + begin=10, + end=200) +] # runtime settings # the max_keep_ckpts controls the max number of ckpt file in your work_dirs diff --git a/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py b/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py index ab8b35c3..cc211cca 100644 --- a/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py +++ b/configs/selfsup/cae/cae_vit-base-p16_32xb64-fp16-coslr-300e_in1k.py @@ -18,15 +18,24 @@ optimizer = dict( }, betas=(0.9, 0.999)) -# learning policy -lr_config = dict( - policy='StepFixCosineAnnealing', - min_lr=1e-5, - warmup='linear', - warmup_iters=10, - warmup_ratio=1e-4, - warmup_by_epoch=True, - by_epoch=False) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=290, + eta_min=1e-5, + by_epoch=True, + begin=10, + end=300, + convert_to_iter_based=True) +] # schedule runner = dict(max_epochs=300) diff --git a/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py b/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py index ec6177ec..d7ad12d2 100644 --- a/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py +++ b/configs/selfsup/deepcluster/deepcluster_resnet50_8xb64-steplr-200e_in1k.py @@ -15,8 +15,10 @@ optimizer = dict( weight_decay=1e-5, paramwise_options={'\\Ahead.': dict(momentum=0.)}) -# learning policy -lr_config = dict(policy='step', step=[400]) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.1) +] # runtime settings # the max_keep_ckpts controls the max number of ckpt file in your work_dirs diff --git a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py index 2ff40bbf..e37f9093 100644 --- a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py +++ b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py @@ -25,28 +25,19 @@ scheduler = [ dict( type='LinearLR', start_factor=1e-4, - by_epoch=False, + by_epoch=True, begin=0, end=40, convert_to_iter_based=True), dict( type='CosineAnnealingLR', T_max=360, - by_epoch=False, + by_epoch=True, begin=40, end=400, convert_to_iter_based=True) ] -lr_config = dict( - policy='StepFixCosineAnnealing', - min_lr=0.0, - warmup='linear', - warmup_iters=40, - warmup_ratio=1e-4, - warmup_by_epoch=True, - by_epoch=False) - # schedule runner = dict(max_epochs=400) diff --git a/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py b/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py index 6d8f4a55..4a262463 100644 --- a/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py +++ b/configs/selfsup/odc/odc_resnet50_8xb64-steplr-440e_in1k.py @@ -19,8 +19,10 @@ optimizer = dict( weight_decay=1e-5, paramwise_options={'\\Ahead.': dict(momentum=0.)}) -# learning policy -lr_config = dict(policy='step', step=[400], gamma=0.4) +# learning rate scheduler +param_scheduler = [ + dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=440) diff --git a/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py b/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py index 48922d6d..6ca795f5 100644 --- a/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py +++ b/configs/selfsup/relative_loc/relative-loc_resnet50_8xb64-steplr-70e_in1k.py @@ -16,14 +16,17 @@ optimizer = dict( '\\Ahead.': dict(weight_decay=5e-4) }) -# learning policy -lr_config = dict( - policy='step', - step=[30, 50], - warmup='linear', - warmup_iters=5, # 5 ep - warmup_ratio=0.1, - warmup_by_epoch=True) +# learning rate scheduler +scheduler = [ + dict( + type='LinearLR', + start_factor=0.1, + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True), + dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=70) diff --git a/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py b/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py index be2c7d25..dbeb73e1 100644 --- a/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py +++ b/configs/selfsup/rotation_pred/rotation-pred_resnet50_8xb16-steplr-70e_in1k.py @@ -8,14 +8,17 @@ _base_ = [ # optimizer optimizer = dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=1e-4) -# learning policy -lr_config = dict( - policy='step', - step=[30, 50], - warmup='linear', - warmup_iters=5, # 5 ep - warmup_ratio=0.1, - warmup_by_epoch=True) +# learning rate scheduler +scheduler = [ + dict( + type='LinearLR', + start_factor=0.1, + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True), + dict(type='MultiStepLR', by_epoch=True, milestones=[30, 50], gamma=0.1) +] # runtime settings runner = dict(type='EpochBasedRunner', max_epochs=70) diff --git a/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py b/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py index 1b94aa86..8c1307b4 100644 --- a/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py +++ b/configs/selfsup/simclr/simclr_resnet50_8xb32-coslr-200e_in1k.py @@ -17,14 +17,23 @@ optimizer = dict( 'bias': dict(weight_decay=0., lars_exclude=True) }) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - min_lr=0., - warmup='linear', - warmup_iters=10, - warmup_ratio=1e-4, - warmup_by_epoch=True) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=190, + eta_min=0., + by_epoch=True, + begin=10, + end=200) +] # runtime settings # the max_keep_ckpts controls the max number of ckpt file in your work_dirs diff --git a/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py b/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py index 8e753ccf..c3e52ea9 100644 --- a/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py +++ b/configs/selfsup/simmim/simmim_swin-base_16xb128-coslr-100e_in1k-192.py @@ -23,15 +23,24 @@ optimizer = dict( # clip gradient optimizer_config = dict(grad_clip=dict(max_norm=5.0)) -# learning policy -lr_config = dict( - policy='CosineAnnealing', - min_lr=1e-5 * 2048 / 512, - warmup='linear', - warmup_iters=10, - warmup_ratio=1e-6 / 2e-4, - warmup_by_epoch=True, - by_epoch=False) +# learning rate scheduler +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1e-6 / 2e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=90, + eta_min=1e-5 * 2048 / 512, + by_epoch=True, + begin=10, + end=100, + convert_to_iter_based=True) +] # mixed precision fp16 = dict(loss_scale='dynamic') diff --git a/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py b/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py index dbd8901a..8c38bf50 100644 --- a/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py +++ b/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py @@ -28,7 +28,15 @@ optimizer = dict(type='LARS', lr=0.6) optimizer_config = dict(frozen_layers_cfg=dict(prototypes=5005)) # learning policy -lr_config = dict(_delete_=True, policy='CosineAnnealing', min_lr=6e-4) +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=200, + eta_min=6e-4, + by_epoch=True, + begin=0, + end=200) +] # runtime settings # the max_keep_ckpts controls the max number of ckpt file in your work_dirs