diff --git a/configs/conformer/conformer-base-p16_8xb128_in1k.py b/configs/conformer/conformer-base-p16_8xb128_in1k.py index a44f56f3..00cac086 100644 --- a/configs/conformer/conformer-base-p16_8xb128_in1k.py +++ b/configs/conformer/conformer-base-p16_8xb128_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] train_dataloader = dict(batch_size=128) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/conformer/conformer-small-p16_8xb128_in1k.py b/configs/conformer/conformer-small-p16_8xb128_in1k.py index a937f4f9..d5d55d79 100644 --- a/configs/conformer/conformer-small-p16_8xb128_in1k.py +++ b/configs/conformer/conformer-small-p16_8xb128_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] train_dataloader = dict(batch_size=128) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/conformer/conformer-small-p32_8xb128_in1k.py b/configs/conformer/conformer-small-p32_8xb128_in1k.py index 0b07ce2c..dcd4bbd6 100644 --- a/configs/conformer/conformer-small-p32_8xb128_in1k.py +++ b/configs/conformer/conformer-small-p32_8xb128_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] train_dataloader = dict(batch_size=128) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/conformer/conformer-tiny-p16_8xb128_in1k.py b/configs/conformer/conformer-tiny-p16_8xb128_in1k.py index f88c6c3b..127a54dd 100644 --- a/configs/conformer/conformer-tiny-p16_8xb128_in1k.py +++ b/configs/conformer/conformer-tiny-p16_8xb128_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] train_dataloader = dict(batch_size=128) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/convmixer/convmixer-1024-20_10xb64_in1k.py b/configs/convmixer/convmixer-1024-20_10xb64_in1k.py index dc5be7c5..5408e1f6 100644 --- a/configs/convmixer/convmixer-1024-20_10xb64_in1k.py +++ b/configs/convmixer/convmixer-1024-20_10xb64_in1k.py @@ -32,3 +32,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=150) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (10 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=640) diff --git a/configs/convmixer/convmixer-1536-20_10xb64_in1k.py b/configs/convmixer/convmixer-1536-20_10xb64_in1k.py index 1acb7be1..d011825c 100644 --- a/configs/convmixer/convmixer-1536-20_10xb64_in1k.py +++ b/configs/convmixer/convmixer-1536-20_10xb64_in1k.py @@ -32,3 +32,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=150) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (10 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=640) diff --git a/configs/convmixer/convmixer-768-32_10xb64_in1k.py b/configs/convmixer/convmixer-768-32_10xb64_in1k.py index e3f14a61..8e092c00 100644 --- a/configs/convmixer/convmixer-768-32_10xb64_in1k.py +++ b/configs/convmixer/convmixer-768-32_10xb64_in1k.py @@ -12,3 +12,8 @@ optim_wrapper = dict( ) train_cfg = dict(by_epoch=True, max_epochs=300) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (10 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=640) diff --git a/configs/convnext/convnext-base_32xb128_in1k.py b/configs/convnext/convnext-base_32xb128_in1k.py index c8fa4ef7..e8fc11c6 100644 --- a/configs/convnext/convnext-base_32xb128_in1k.py +++ b/configs/convnext/convnext-base_32xb128_in1k.py @@ -16,3 +16,8 @@ optim_wrapper = dict( # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/convnext/convnext-large_64xb64_in1k.py b/configs/convnext/convnext-large_64xb64_in1k.py index 6edc3a58..4dda425a 100644 --- a/configs/convnext/convnext-large_64xb64_in1k.py +++ b/configs/convnext/convnext-large_64xb64_in1k.py @@ -16,3 +16,8 @@ optim_wrapper = dict( # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/convnext/convnext-small_32xb128_in1k.py b/configs/convnext/convnext-small_32xb128_in1k.py index 13304332..a7215bfa 100644 --- a/configs/convnext/convnext-small_32xb128_in1k.py +++ b/configs/convnext/convnext-small_32xb128_in1k.py @@ -16,3 +16,8 @@ optim_wrapper = dict( # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/convnext/convnext-tiny_32xb128_in1k.py b/configs/convnext/convnext-tiny_32xb128_in1k.py index 5c09a279..d46c88a6 100644 --- a/configs/convnext/convnext-tiny_32xb128_in1k.py +++ b/configs/convnext/convnext-tiny_32xb128_in1k.py @@ -16,3 +16,8 @@ optim_wrapper = dict( # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/convnext/convnext-xlarge_64xb64_in1k.py b/configs/convnext/convnext-xlarge_64xb64_in1k.py index e8f29739..da493332 100644 --- a/configs/convnext/convnext-xlarge_64xb64_in1k.py +++ b/configs/convnext/convnext-xlarge_64xb64_in1k.py @@ -16,3 +16,8 @@ optim_wrapper = dict( # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/cspnet/cspdarknet50_8xb32_in1k.py b/configs/cspnet/cspdarknet50_8xb32_in1k.py index 4edc2531..8688eea0 100644 --- a/configs/cspnet/cspdarknet50_8xb32_in1k.py +++ b/configs/cspnet/cspdarknet50_8xb32_in1k.py @@ -43,3 +43,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/cspnet/cspresnet50_8xb32_in1k.py b/configs/cspnet/cspresnet50_8xb32_in1k.py index b28c8fe6..8ba015c1 100644 --- a/configs/cspnet/cspresnet50_8xb32_in1k.py +++ b/configs/cspnet/cspresnet50_8xb32_in1k.py @@ -43,3 +43,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/cspnet/cspresnext50_8xb32_in1k.py b/configs/cspnet/cspresnext50_8xb32_in1k.py index 5885bd98..64092086 100644 --- a/configs/cspnet/cspresnext50_8xb32_in1k.py +++ b/configs/cspnet/cspresnext50_8xb32_in1k.py @@ -43,3 +43,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py b/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py index c8bdfb53..13a6df74 100644 --- a/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py +++ b/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py @@ -7,3 +7,8 @@ model = dict( # Change to the path of the pretrained model # init_cfg=dict(type='Pretrained', checkpoint=''), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/deit/deit-base-distilled_pt-16xb64_in1k.py b/configs/deit/deit-base-distilled_pt-16xb64_in1k.py index 039e53d9..818b41ca 100644 --- a/configs/deit/deit-base-distilled_pt-16xb64_in1k.py +++ b/configs/deit/deit-base-distilled_pt-16xb64_in1k.py @@ -8,3 +8,8 @@ model = dict( # dataset settings train_dataloader = dict(batch_size=64) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/deit/deit-base_ft-16xb32_in1k-384px.py b/configs/deit/deit-base_ft-16xb32_in1k-384px.py index 9f53db2b..951a054a 100644 --- a/configs/deit/deit-base_ft-16xb32_in1k-384px.py +++ b/configs/deit/deit-base_ft-16xb32_in1k-384px.py @@ -30,3 +30,8 @@ train_dataloader = dict(batch_size=32) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/deit/deit-base_pt-16xb64_in1k.py b/configs/deit/deit-base_pt-16xb64_in1k.py index a4691a32..c7e30604 100644 --- a/configs/deit/deit-base_pt-16xb64_in1k.py +++ b/configs/deit/deit-base_pt-16xb64_in1k.py @@ -12,3 +12,8 @@ train_dataloader = dict(batch_size=64) # runtime settings custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/deit/deit-small-distilled_pt-4xb256_in1k.py b/configs/deit/deit-small-distilled_pt-4xb256_in1k.py index 3b1fac22..9415d6d3 100644 --- a/configs/deit/deit-small-distilled_pt-4xb256_in1k.py +++ b/configs/deit/deit-small-distilled_pt-4xb256_in1k.py @@ -5,3 +5,8 @@ model = dict( backbone=dict(type='DistilledVisionTransformer', arch='deit-small'), head=dict(type='DeiTClsHead', in_channels=384), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/deit/deit-small_pt-4xb256_in1k.py b/configs/deit/deit-small_pt-4xb256_in1k.py index e28d12f3..195e5d4e 100644 --- a/configs/deit/deit-small_pt-4xb256_in1k.py +++ b/configs/deit/deit-small_pt-4xb256_in1k.py @@ -46,3 +46,8 @@ optim_wrapper = dict( }), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py b/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py index 175f9804..b365deb7 100644 --- a/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py +++ b/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py @@ -5,3 +5,8 @@ model = dict( backbone=dict(type='DistilledVisionTransformer', arch='deit-tiny'), head=dict(type='DeiTClsHead', in_channels=192), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/deit/deit-tiny_pt-4xb256_in1k.py b/configs/deit/deit-tiny_pt-4xb256_in1k.py index 43df6e13..ffc5a01b 100644 --- a/configs/deit/deit-tiny_pt-4xb256_in1k.py +++ b/configs/deit/deit-tiny_pt-4xb256_in1k.py @@ -5,3 +5,8 @@ model = dict( backbone=dict(type='VisionTransformer', arch='deit-tiny'), head=dict(type='VisionTransformerClsHead', in_channels=192), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/densenet/densenet121_4xb256_in1k.py b/configs/densenet/densenet121_4xb256_in1k.py index dc03defb..a888869e 100644 --- a/configs/densenet/densenet121_4xb256_in1k.py +++ b/configs/densenet/densenet121_4xb256_in1k.py @@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256) # schedule settings train_cfg = dict(by_epoch=True, max_epochs=90) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/densenet/densenet161_4xb256_in1k.py b/configs/densenet/densenet161_4xb256_in1k.py index 96a14121..adbe6049 100644 --- a/configs/densenet/densenet161_4xb256_in1k.py +++ b/configs/densenet/densenet161_4xb256_in1k.py @@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256) # schedule settings train_cfg = dict(by_epoch=True, max_epochs=90) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/densenet/densenet169_4xb256_in1k.py b/configs/densenet/densenet169_4xb256_in1k.py index 74b7b868..d4fc4d07 100644 --- a/configs/densenet/densenet169_4xb256_in1k.py +++ b/configs/densenet/densenet169_4xb256_in1k.py @@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256) # schedule settings train_cfg = dict(by_epoch=True, max_epochs=90) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/densenet/densenet201_4xb256_in1k.py b/configs/densenet/densenet201_4xb256_in1k.py index 5ce4eed3..7acf02ce 100644 --- a/configs/densenet/densenet201_4xb256_in1k.py +++ b/configs/densenet/densenet201_4xb256_in1k.py @@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256) # schedule settings train_cfg = dict(by_epoch=True, max_epochs=90) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py index 26f35917..efd1133b 100644 --- a/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b0_8xb32_in1k.py b/configs/efficientnet/efficientnet-b0_8xb32_in1k.py index b88de4ee..86a3dae3 100644 --- a/configs/efficientnet/efficientnet-b0_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b0_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py index db82ac95..34b6e99b 100644 --- a/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b1_8xb32_in1k.py b/configs/efficientnet/efficientnet-b1_8xb32_in1k.py index 53651159..d026a315 100644 --- a/configs/efficientnet/efficientnet-b1_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b1_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py index 5f6485ad..5013ad11 100644 --- a/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b2_8xb32_in1k.py b/configs/efficientnet/efficientnet-b2_8xb32_in1k.py index ab389819..4092a799 100644 --- a/configs/efficientnet/efficientnet-b2_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b2_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py index cee63852..27c258b9 100644 --- a/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b3_8xb32_in1k.py b/configs/efficientnet/efficientnet-b3_8xb32_in1k.py index 55cad6ad..2fa86a03 100644 --- a/configs/efficientnet/efficientnet-b3_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b3_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py index 7d7d9b18..bf84ce89 100644 --- a/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b4_8xb32_in1k.py b/configs/efficientnet/efficientnet-b4_8xb32_in1k.py index 475daa4a..523afe12 100644 --- a/configs/efficientnet/efficientnet-b4_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b4_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py index d2d90f10..424aa82b 100644 --- a/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b5_8xb32_in1k.py b/configs/efficientnet/efficientnet-b5_8xb32_in1k.py index b548de37..95e2c9ac 100644 --- a/configs/efficientnet/efficientnet-b5_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b5_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py index cea89508..f5be6afe 100644 --- a/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b6_8xb32_in1k.py b/configs/efficientnet/efficientnet-b6_8xb32_in1k.py index eb9f9da6..0724160a 100644 --- a/configs/efficientnet/efficientnet-b6_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b6_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py index 9ed4d7a5..c3f23c97 100644 --- a/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b7_8xb32_in1k.py b/configs/efficientnet/efficientnet-b7_8xb32_in1k.py index 3f9c1fc2..31a220e3 100644 --- a/configs/efficientnet/efficientnet-b7_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b7_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py index 79e34e8e..de2c297a 100644 --- a/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-b8_8xb32_in1k.py b/configs/efficientnet/efficientnet-b8_8xb32_in1k.py index 81934303..570e7220 100644 --- a/configs/efficientnet/efficientnet-b8_8xb32_in1k.py +++ b/configs/efficientnet/efficientnet-b8_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py index a4b91426..301739a2 100644 --- a/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py @@ -29,3 +29,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py b/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py index be79f225..26112061 100644 --- a/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py +++ b/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/hrnet/hrnet-w18_4xb32_in1k.py b/configs/hrnet/hrnet-w18_4xb32_in1k.py index a84fe67f..80c96236 100644 --- a/configs/hrnet/hrnet-w18_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w18_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w30_4xb32_in1k.py b/configs/hrnet/hrnet-w30_4xb32_in1k.py index d2a9c0dd..a115b9f6 100644 --- a/configs/hrnet/hrnet-w30_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w30_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w32_4xb32_in1k.py b/configs/hrnet/hrnet-w32_4xb32_in1k.py index 91380a96..ae212efd 100644 --- a/configs/hrnet/hrnet-w32_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w32_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w40_4xb32_in1k.py b/configs/hrnet/hrnet-w40_4xb32_in1k.py index 5d35cecd..3306d9fc 100644 --- a/configs/hrnet/hrnet-w40_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w40_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w44_4xb32_in1k.py b/configs/hrnet/hrnet-w44_4xb32_in1k.py index ce6bb41a..bcbd8e72 100644 --- a/configs/hrnet/hrnet-w44_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w44_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w48_4xb32_in1k.py b/configs/hrnet/hrnet-w48_4xb32_in1k.py index 6943892e..ca488d1b 100644 --- a/configs/hrnet/hrnet-w48_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w48_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/hrnet/hrnet-w64_4xb32_in1k.py b/configs/hrnet/hrnet-w64_4xb32_in1k.py index 0009bc67..8ce649db 100644 --- a/configs/hrnet/hrnet-w64_4xb32_in1k.py +++ b/configs/hrnet/hrnet-w64_4xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/inception_v3/inception-v3_8xb32_in1k.py b/configs/inception_v3/inception-v3_8xb32_in1k.py index 061ea6e5..4a0a32da 100644 --- a/configs/inception_v3/inception-v3_8xb32_in1k.py +++ b/configs/inception_v3/inception-v3_8xb32_in1k.py @@ -22,3 +22,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/lenet/lenet5_mnist.py b/configs/lenet/lenet5_mnist.py index 78f2ada8..c54bb435 100644 --- a/configs/lenet/lenet5_mnist.py +++ b/configs/lenet/lenet5_mnist.py @@ -84,3 +84,8 @@ load_from = None # whether to resume the training of the checkpoint resume_from = None + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py b/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py index bbf4268d..0c6e10a9 100644 --- a/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py +++ b/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py b/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py index 4fbe9c5c..60124e19 100644 --- a/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py +++ b/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py @@ -6,3 +6,8 @@ _base_ = [ ] optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py b/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py index afd2d979..01997276 100644 --- a/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py +++ b/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_epochstep.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py b/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py index 23a329c2..00854327 100644 --- a/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py +++ b/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py @@ -21,3 +21,8 @@ param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973) train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1) val_cfg = dict() test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py b/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py index 71c72224..6b491b9d 100644 --- a/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py +++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py @@ -13,3 +13,8 @@ param_scheduler = dict( ) train_cfg = dict(by_epoch=True, max_epochs=200) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py b/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py index b724a610..549fafea 100644 --- a/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py +++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py @@ -21,3 +21,8 @@ param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973) train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1) val_cfg = dict() test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/poolformer/poolformer-m36_32xb128_in1k.py b/configs/poolformer/poolformer-m36_32xb128_in1k.py index 7b71d1f3..ae06c3a0 100644 --- a/configs/poolformer/poolformer-m36_32xb128_in1k.py +++ b/configs/poolformer/poolformer-m36_32xb128_in1k.py @@ -10,3 +10,8 @@ optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/poolformer/poolformer-m48_32xb128_in1k.py b/configs/poolformer/poolformer-m48_32xb128_in1k.py index 832102f6..8c3cb491 100644 --- a/configs/poolformer/poolformer-m48_32xb128_in1k.py +++ b/configs/poolformer/poolformer-m48_32xb128_in1k.py @@ -10,3 +10,8 @@ optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/poolformer/poolformer-s12_32xb128_in1k.py b/configs/poolformer/poolformer-s12_32xb128_in1k.py index b346ad8b..ea174e7d 100644 --- a/configs/poolformer/poolformer-s12_32xb128_in1k.py +++ b/configs/poolformer/poolformer-s12_32xb128_in1k.py @@ -10,3 +10,8 @@ optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/poolformer/poolformer-s24_32xb128_in1k.py b/configs/poolformer/poolformer-s24_32xb128_in1k.py index c0b17f30..fbd69bed 100644 --- a/configs/poolformer/poolformer-s24_32xb128_in1k.py +++ b/configs/poolformer/poolformer-s24_32xb128_in1k.py @@ -10,3 +10,8 @@ optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/poolformer/poolformer-s36_32xb128_in1k.py b/configs/poolformer/poolformer-s36_32xb128_in1k.py index f3487d13..2d5e30d2 100644 --- a/configs/poolformer/poolformer-s36_32xb128_in1k.py +++ b/configs/poolformer/poolformer-s36_32xb128_in1k.py @@ -10,3 +10,8 @@ optim_wrapper = dict( optimizer=dict(lr=4e-3), clip_grad=dict(max_norm=5.0), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/regnet/regnetx-1.6gf_8xb128_in1k.py b/configs/regnet/regnetx-1.6gf_8xb128_in1k.py index d3e9e934..3c11d071 100644 --- a/configs/regnet/regnetx-1.6gf_8xb128_in1k.py +++ b/configs/regnet/regnetx-1.6gf_8xb128_in1k.py @@ -4,3 +4,8 @@ _base_ = ['./regnetx-400mf_8xb128_in1k.py'] model = dict( backbone=dict(type='RegNet', arch='regnetx_1.6gf'), head=dict(in_channels=912, )) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/regnet/regnetx-12gf_8xb64_in1k.py b/configs/regnet/regnetx-12gf_8xb64_in1k.py index 2c84750f..480d8f1e 100644 --- a/configs/regnet/regnetx-12gf_8xb64_in1k.py +++ b/configs/regnet/regnetx-12gf_8xb64_in1k.py @@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64) # schedule settings # for batch_size 512, use lr = 0.4 optim_wrapper = dict(optimizer=dict(lr=0.4)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/regnet/regnetx-3.2gf_8xb64_in1k.py b/configs/regnet/regnetx-3.2gf_8xb64_in1k.py index 89101e14..713621b5 100644 --- a/configs/regnet/regnetx-3.2gf_8xb64_in1k.py +++ b/configs/regnet/regnetx-3.2gf_8xb64_in1k.py @@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64) # schedule settings # for batch_size 512, use lr = 0.4 optim_wrapper = dict(optimizer=dict(lr=0.4)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/regnet/regnetx-4.0gf_8xb64_in1k.py b/configs/regnet/regnetx-4.0gf_8xb64_in1k.py index cece654b..2ad6504d 100644 --- a/configs/regnet/regnetx-4.0gf_8xb64_in1k.py +++ b/configs/regnet/regnetx-4.0gf_8xb64_in1k.py @@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64) # schedule settings # for batch_size 512, use lr = 0.4 optim_wrapper = dict(optimizer=dict(lr=0.4)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/regnet/regnetx-400mf_8xb128_in1k.py b/configs/regnet/regnetx-400mf_8xb128_in1k.py index 9f272381..ddbc660d 100644 --- a/configs/regnet/regnetx-400mf_8xb128_in1k.py +++ b/configs/regnet/regnetx-400mf_8xb128_in1k.py @@ -53,3 +53,8 @@ custom_hooks = [ interval=1, priority='ABOVE_NORMAL') ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/regnet/regnetx-6.4gf_8xb64_in1k.py b/configs/regnet/regnetx-6.4gf_8xb64_in1k.py index 488fc40e..80f7914c 100644 --- a/configs/regnet/regnetx-6.4gf_8xb64_in1k.py +++ b/configs/regnet/regnetx-6.4gf_8xb64_in1k.py @@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64) # schedule settings # for batch_size 512, use lr = 0.4 optim_wrapper = dict(optimizer=dict(lr=0.4)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/regnet/regnetx-8.0gf_8xb64_in1k.py b/configs/regnet/regnetx-8.0gf_8xb64_in1k.py index 98ca8954..c7e4117b 100644 --- a/configs/regnet/regnetx-8.0gf_8xb64_in1k.py +++ b/configs/regnet/regnetx-8.0gf_8xb64_in1k.py @@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64) # schedule settings # for batch_size 512, use lr = 0.4 optim_wrapper = dict(optimizer=dict(lr=0.4)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/regnet/regnetx-800mf_8xb128_in1k.py b/configs/regnet/regnetx-800mf_8xb128_in1k.py index 9cd71379..6931b155 100644 --- a/configs/regnet/regnetx-800mf_8xb128_in1k.py +++ b/configs/regnet/regnetx-800mf_8xb128_in1k.py @@ -4,3 +4,8 @@ _base_ = ['./regnetx-400mf_8xb128_in1k.py'] model = dict( backbone=dict(type='RegNet', arch='regnetx_800mf'), head=dict(in_channels=672, )) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/repmlp/repmlp-base_8xb64_in1k-256px.py b/configs/repmlp/repmlp-base_8xb64_in1k-256px.py index 86f1edc3..abba9bb0 100644 --- a/configs/repmlp/repmlp-base_8xb64_in1k-256px.py +++ b/configs/repmlp/repmlp-base_8xb64_in1k-256px.py @@ -29,3 +29,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/repmlp/repmlp-base_8xb64_in1k.py b/configs/repmlp/repmlp-base_8xb64_in1k.py index ad6e791a..a55d19ea 100644 --- a/configs/repmlp/repmlp-base_8xb64_in1k.py +++ b/configs/repmlp/repmlp-base_8xb64_in1k.py @@ -19,3 +19,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py b/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py index b5b2c882..01a1ed3e 100644 --- a/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py +++ b/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py @@ -1,3 +1,8 @@ _base_ = ['./repmlp-base_8xb64_in1k.py'] model = dict(backbone=dict(deploy=True)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py b/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py index 27ff50a0..1ce9c238 100644 --- a/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py +++ b/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py @@ -1,3 +1,8 @@ _base_ = ['./repmlp-base_8xb64_in1k-256px.py'] model = dict(backbone=dict(deploy=True)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py index 8a93ed0a..ca4cb757 100644 --- a/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py @@ -10,3 +10,8 @@ param_scheduler = dict( type='CosineAnnealingLR', T_max=120, by_epoch=True, begin=0, end=120) train_cfg = dict(by_epoch=True, max_epochs=120) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py index 649020f2..5eb1c8bf 100644 --- a/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='A1')) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py index eedaf2d2..2f955dad 100644 --- a/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='A2'), head=dict(in_channels=1408)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py index b3ce7ea2..209f3ad3 100644 --- a/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='B0'), head=dict(in_channels=1280)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py index 30adea3d..3270d19b 100644 --- a/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='B1'), head=dict(in_channels=2048)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py index 2749db8d..10a6847d 100644 --- a/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='B1g2'), head=dict(in_channels=2048)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py index 26476909..cef5d8a0 100644 --- a/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='B1g4'), head=dict(in_channels=2048)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py b/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py index 4d215567..2800e320 100644 --- a/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py +++ b/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py' model = dict(backbone=dict(arch='B2'), head=dict(in_channels=2560)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py b/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py index 11331cf0..993d97a2 100644 --- a/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py +++ b/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py' model = dict(backbone=dict(arch='B2g4')) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py b/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py index 97334aff..8b7ed746 100644 --- a/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py +++ b/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py @@ -37,3 +37,8 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py b/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py index 67e3688c..467cd9f4 100644 --- a/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py +++ b/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py' model = dict(backbone=dict(arch='B3g4')) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py b/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py index d235610f..fadc0803 100644 --- a/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py +++ b/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py @@ -1,3 +1,8 @@ _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py' model = dict(backbone=dict(arch='D2se')) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (4 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/res2net/res2net101-w26-s4_8xb32_in1k.py b/configs/res2net/res2net101-w26-s4_8xb32_in1k.py index 7ebe9e94..a3708f21 100644 --- a/configs/res2net/res2net101-w26-s4_8xb32_in1k.py +++ b/configs/res2net/res2net101-w26-s4_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/res2net/res2net50-w14-s8_8xb32_in1k.py b/configs/res2net/res2net50-w14-s8_8xb32_in1k.py index 56cc02e3..4aa8241d 100644 --- a/configs/res2net/res2net50-w14-s8_8xb32_in1k.py +++ b/configs/res2net/res2net50-w14-s8_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/res2net/res2net50-w26-s8_8xb32_in1k.py b/configs/res2net/res2net50-w26-s8_8xb32_in1k.py index d7dcbeb9..18896bb6 100644 --- a/configs/res2net/res2net50-w26-s8_8xb32_in1k.py +++ b/configs/res2net/res2net50-w26-s8_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnest/resnest101_32xb64_in1k.py b/configs/resnest/resnest101_32xb64_in1k.py index a2a6ca95..7b813f76 100644 --- a/configs/resnest/resnest101_32xb64_in1k.py +++ b/configs/resnest/resnest101_32xb64_in1k.py @@ -71,3 +71,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=270) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnest/resnest200_64xb32_in1k.py b/configs/resnest/resnest200_64xb32_in1k.py index 65fc3d9f..334710e5 100644 --- a/configs/resnest/resnest200_64xb32_in1k.py +++ b/configs/resnest/resnest200_64xb32_in1k.py @@ -67,3 +67,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=270) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnest/resnest269_64xb32_in1k.py b/configs/resnest/resnest269_64xb32_in1k.py index 08e85315..d4ef4d1f 100644 --- a/configs/resnest/resnest269_64xb32_in1k.py +++ b/configs/resnest/resnest269_64xb32_in1k.py @@ -71,3 +71,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=270) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnest/resnest50_32xb64_in1k.py b/configs/resnest/resnest50_32xb64_in1k.py index e8096517..82675003 100644 --- a/configs/resnest/resnest50_32xb64_in1k.py +++ b/configs/resnest/resnest50_32xb64_in1k.py @@ -71,3 +71,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=270) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet101_8xb16_cifar10.py b/configs/resnet/resnet101_8xb16_cifar10.py index 166a1740..5d57c0e2 100644 --- a/configs/resnet/resnet101_8xb16_cifar10.py +++ b/configs/resnet/resnet101_8xb16_cifar10.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet101_8xb32_in1k.py b/configs/resnet/resnet101_8xb32_in1k.py index 388d2cd9..812134f6 100644 --- a/configs/resnet/resnet101_8xb32_in1k.py +++ b/configs/resnet/resnet101_8xb32_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet101.py', '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet152_8xb16_cifar10.py b/configs/resnet/resnet152_8xb16_cifar10.py index 3f307b6a..12a0a5e5 100644 --- a/configs/resnet/resnet152_8xb16_cifar10.py +++ b/configs/resnet/resnet152_8xb16_cifar10.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet152_8xb32_in1k.py b/configs/resnet/resnet152_8xb32_in1k.py index cc9dc2ce..46d39fc4 100644 --- a/configs/resnet/resnet152_8xb32_in1k.py +++ b/configs/resnet/resnet152_8xb32_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet152.py', '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet18_8xb16_cifar10.py b/configs/resnet/resnet18_8xb16_cifar10.py index c7afa397..ae0264e1 100644 --- a/configs/resnet/resnet18_8xb16_cifar10.py +++ b/configs/resnet/resnet18_8xb16_cifar10.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet18_cifar.py', '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet18_8xb32_in1k.py b/configs/resnet/resnet18_8xb32_in1k.py index ac452ff7..383638f1 100644 --- a/configs/resnet/resnet18_8xb32_in1k.py +++ b/configs/resnet/resnet18_8xb32_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet18.py', '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet34_8xb16_cifar10.py b/configs/resnet/resnet34_8xb16_cifar10.py index 7f5cd517..b1f8b11c 100644 --- a/configs/resnet/resnet34_8xb16_cifar10.py +++ b/configs/resnet/resnet34_8xb16_cifar10.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet34_cifar.py', '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet34_8xb32_in1k.py b/configs/resnet/resnet34_8xb32_in1k.py index 7749261c..0634c555 100644 --- a/configs/resnet/resnet34_8xb32_in1k.py +++ b/configs/resnet/resnet34_8xb32_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet34.py', '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_32xb64-warmup-coslr_in1k.py b/configs/resnet/resnet50_32xb64-warmup-coslr_in1k.py index c26245ef..12c07fcc 100644 --- a/configs/resnet/resnet50_32xb64-warmup-coslr_in1k.py +++ b/configs/resnet/resnet50_32xb64-warmup-coslr_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs2048_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_32xb64-warmup-lbs_in1k.py b/configs/resnet/resnet50_32xb64-warmup-lbs_in1k.py index 2f24f9a0..24168cc5 100644 --- a/configs/resnet/resnet50_32xb64-warmup-lbs_in1k.py +++ b/configs/resnet/resnet50_32xb64-warmup-lbs_in1k.py @@ -10,3 +10,8 @@ model = dict( label_smooth_val=0.1, num_classes=1000), )) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_32xb64-warmup_in1k.py b/configs/resnet/resnet50_32xb64-warmup_in1k.py index 34d5288b..6af884d7 100644 --- a/configs/resnet/resnet50_32xb64-warmup_in1k.py +++ b/configs/resnet/resnet50_32xb64-warmup_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs64.py', '../_base_/schedules/imagenet_bs2048.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_8xb128_coslr-90e_in21k.py b/configs/resnet/resnet50_8xb128_coslr-90e_in21k.py index d2cc1ee2..ff4c1511 100644 --- a/configs/resnet/resnet50_8xb128_coslr-90e_in21k.py +++ b/configs/resnet/resnet50_8xb128_coslr-90e_in21k.py @@ -9,3 +9,8 @@ model = dict(head=dict(num_classes=21843)) # runtime settings train_cfg = dict(by_epoch=True, max_epochs=90) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/resnet/resnet50_8xb16-mixup_cifar10.py b/configs/resnet/resnet50_8xb16-mixup_cifar10.py index 2420ebfe..e5b480fc 100644 --- a/configs/resnet/resnet50_8xb16-mixup_cifar10.py +++ b/configs/resnet/resnet50_8xb16-mixup_cifar10.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet50_8xb16_cifar10.py b/configs/resnet/resnet50_8xb16_cifar10.py index 669e5de2..e1766c43 100644 --- a/configs/resnet/resnet50_8xb16_cifar10.py +++ b/configs/resnet/resnet50_8xb16_cifar10.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet50_cifar.py', '../_base_/datasets/cifar10_bs16.py', '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet50_8xb16_cifar100.py b/configs/resnet/resnet50_8xb16_cifar100.py index ebde6c76..7c740b3c 100644 --- a/configs/resnet/resnet50_8xb16_cifar100.py +++ b/configs/resnet/resnet50_8xb16_cifar100.py @@ -17,3 +17,8 @@ param_scheduler = dict( milestones=[60, 120, 160], gamma=0.2, ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py b/configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py index 3d2d5894..e0c90293 100644 --- a/configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py +++ b/configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py @@ -53,3 +53,8 @@ param_scheduler = [ ] train_cfg = dict(by_epoch=True, max_epochs=600) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_8xb256-rsb-a2-300e_in1k.py b/configs/resnet/resnet50_8xb256-rsb-a2-300e_in1k.py index a8e93003..f4330385 100644 --- a/configs/resnet/resnet50_8xb256-rsb-a2-300e_in1k.py +++ b/configs/resnet/resnet50_8xb256-rsb-a2-300e_in1k.py @@ -44,3 +44,8 @@ param_scheduler = [ end=300) ] train_cfg = dict(by_epoch=True, max_epochs=300) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_8xb256-rsb-a3-100e_in1k.py b/configs/resnet/resnet50_8xb256-rsb-a3-100e_in1k.py index e6872a3b..00c44b8c 100644 --- a/configs/resnet/resnet50_8xb256-rsb-a3-100e_in1k.py +++ b/configs/resnet/resnet50_8xb256-rsb-a3-100e_in1k.py @@ -20,3 +20,8 @@ optim_wrapper = dict( optimizer=dict(lr=0.008), paramwise_cfg=dict(bias_decay_mult=0., norm_decay_mult=0.), ) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (256 samples per GPU) +auto_scale_lr = dict(base_batch_size=2048) diff --git a/configs/resnet/resnet50_8xb32-coslr-preciseBN_in1k.py b/configs/resnet/resnet50_8xb32-coslr-preciseBN_in1k.py index dab82c6e..58c20def 100644 --- a/configs/resnet/resnet50_8xb32-coslr-preciseBN_in1k.py +++ b/configs/resnet/resnet50_8xb32-coslr-preciseBN_in1k.py @@ -10,3 +10,8 @@ custom_hooks = [ interval=1, priority='ABOVE_NORMAL') ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-coslr_in1k.py b/configs/resnet/resnet50_8xb32-coslr_in1k.py index 938a114b..45bcdc68 100644 --- a/configs/resnet/resnet50_8xb32-coslr_in1k.py +++ b/configs/resnet/resnet50_8xb32-coslr_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_coslr.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-cutmix_in1k.py b/configs/resnet/resnet50_8xb32-cutmix_in1k.py index 2f8d0ca9..1db9ecb6 100644 --- a/configs/resnet/resnet50_8xb32-cutmix_in1k.py +++ b/configs/resnet/resnet50_8xb32-cutmix_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-fp16-dynamic_in1k.py b/configs/resnet/resnet50_8xb32-fp16-dynamic_in1k.py index 58f6fe4c..ce90a63a 100644 --- a/configs/resnet/resnet50_8xb32-fp16-dynamic_in1k.py +++ b/configs/resnet/resnet50_8xb32-fp16-dynamic_in1k.py @@ -2,3 +2,8 @@ _base_ = ['./resnet50_8xb32_in1k.py'] # schedule settings optim_wrapper = dict(type='AmpOptimWrapper', loss_scale='dynamic') + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-fp16_in1k.py b/configs/resnet/resnet50_8xb32-fp16_in1k.py index 19ee6ee4..592e37ef 100644 --- a/configs/resnet/resnet50_8xb32-fp16_in1k.py +++ b/configs/resnet/resnet50_8xb32-fp16_in1k.py @@ -2,3 +2,8 @@ _base_ = ['./resnet50_8xb32_in1k.py'] # schedule settings optim_wrapper = dict(type='AmpOptimWrapper', loss_scale=512.) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-lbs_in1k.py b/configs/resnet/resnet50_8xb32-lbs_in1k.py index 1c1aa5a2..fef90281 100644 --- a/configs/resnet/resnet50_8xb32-lbs_in1k.py +++ b/configs/resnet/resnet50_8xb32-lbs_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32-mixup_in1k.py b/configs/resnet/resnet50_8xb32-mixup_in1k.py index 2a153d0e..b2f3e141 100644 --- a/configs/resnet/resnet50_8xb32-mixup_in1k.py +++ b/configs/resnet/resnet50_8xb32-mixup_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb32_in1k.py b/configs/resnet/resnet50_8xb32_in1k.py index c32f333b..c01f2655 100644 --- a/configs/resnet/resnet50_8xb32_in1k.py +++ b/configs/resnet/resnet50_8xb32_in1k.py @@ -2,3 +2,8 @@ _base_ = [ '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs32.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnet50_8xb8_cub.py b/configs/resnet/resnet50_8xb8_cub.py index 17054ef5..02f1ff5b 100644 --- a/configs/resnet/resnet50_8xb8_cub.py +++ b/configs/resnet/resnet50_8xb8_cub.py @@ -18,3 +18,8 @@ model = dict( # runtime settings default_hooks = dict(logger=dict(type='LoggerHook', interval=20)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/resnet/resnetv1c101_8xb32_in1k.py b/configs/resnet/resnetv1c101_8xb32_in1k.py index 441aff59..d8cf28bc 100644 --- a/configs/resnet/resnetv1c101_8xb32_in1k.py +++ b/configs/resnet/resnetv1c101_8xb32_in1k.py @@ -5,3 +5,8 @@ _base_ = [ ] model = dict(backbone=dict(depth=101)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnetv1c152_8xb32_in1k.py b/configs/resnet/resnetv1c152_8xb32_in1k.py index b9f466f8..e16345b2 100644 --- a/configs/resnet/resnetv1c152_8xb32_in1k.py +++ b/configs/resnet/resnetv1c152_8xb32_in1k.py @@ -5,3 +5,8 @@ _base_ = [ ] model = dict(backbone=dict(depth=152)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnetv1c50_8xb32_in1k.py b/configs/resnet/resnetv1c50_8xb32_in1k.py index aa1c8b64..06e0c613 100644 --- a/configs/resnet/resnetv1c50_8xb32_in1k.py +++ b/configs/resnet/resnetv1c50_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnetv1d101_8xb32_in1k.py b/configs/resnet/resnetv1d101_8xb32_in1k.py index b16ca863..307a7700 100644 --- a/configs/resnet/resnetv1d101_8xb32_in1k.py +++ b/configs/resnet/resnetv1d101_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnetv1d152_8xb32_in1k.py b/configs/resnet/resnetv1d152_8xb32_in1k.py index 76926ddb..1de344e6 100644 --- a/configs/resnet/resnetv1d152_8xb32_in1k.py +++ b/configs/resnet/resnetv1d152_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnet/resnetv1d50_8xb32_in1k.py b/configs/resnet/resnetv1d50_8xb32_in1k.py index 208bde47..cb672555 100644 --- a/configs/resnet/resnetv1d50_8xb32_in1k.py +++ b/configs/resnet/resnetv1d50_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnext/resnext101-32x4d_8xb32_in1k.py b/configs/resnext/resnext101-32x4d_8xb32_in1k.py index 970aa60f..c8691cae 100644 --- a/configs/resnext/resnext101-32x4d_8xb32_in1k.py +++ b/configs/resnext/resnext101-32x4d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnext/resnext101-32x8d_8xb32_in1k.py b/configs/resnext/resnext101-32x8d_8xb32_in1k.py index 315d05fd..45bf4f79 100644 --- a/configs/resnext/resnext101-32x8d_8xb32_in1k.py +++ b/configs/resnext/resnext101-32x8d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnext/resnext152-32x4d_8xb32_in1k.py b/configs/resnext/resnext152-32x4d_8xb32_in1k.py index 9c137313..918a7dee 100644 --- a/configs/resnext/resnext152-32x4d_8xb32_in1k.py +++ b/configs/resnext/resnext152-32x4d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/resnext/resnext50-32x4d_8xb32_in1k.py b/configs/resnext/resnext50-32x4d_8xb32_in1k.py index bd9c9fcf..298cd320 100644 --- a/configs/resnext/resnext50-32x4d_8xb32_in1k.py +++ b/configs/resnext/resnext50-32x4d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/seresnet/seresnet101_8xb32_in1k.py b/configs/seresnet/seresnet101_8xb32_in1k.py index 8be39e7a..f2f99336 100644 --- a/configs/seresnet/seresnet101_8xb32_in1k.py +++ b/configs/seresnet/seresnet101_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/seresnet/seresnet50_8xb32_in1k.py b/configs/seresnet/seresnet50_8xb32_in1k.py index 19082bd0..7159f617 100644 --- a/configs/seresnet/seresnet50_8xb32_in1k.py +++ b/configs/seresnet/seresnet50_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256_140e.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/seresnet/seresnext101-32x4d_8xb32_in1k.py b/configs/seresnet/seresnext101-32x4d_8xb32_in1k.py index 01778305..b89f464a 100644 --- a/configs/seresnet/seresnext101-32x4d_8xb32_in1k.py +++ b/configs/seresnet/seresnext101-32x4d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/seresnet/seresnext50-32x4d_8xb32_in1k.py b/configs/seresnet/seresnext50-32x4d_8xb32_in1k.py index 4d593e45..73ebbdf3 100644 --- a/configs/seresnet/seresnext50-32x4d_8xb32_in1k.py +++ b/configs/seresnet/seresnext50-32x4d_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py b/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py index 58e45f1b..a6ada1e0 100644 --- a/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py +++ b/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs1024_linearlr_bn_nowd.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py b/configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py index a106ab86..99d81e38 100644 --- a/configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py +++ b/configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs1024_linearlr_bn_nowd.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-base_16xb64_in1k-384px.py b/configs/swin_transformer/swin-base_16xb64_in1k-384px.py index 10f89921..1df65856 100644 --- a/configs/swin_transformer/swin-base_16xb64_in1k-384px.py +++ b/configs/swin_transformer/swin-base_16xb64_in1k-384px.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-base_16xb64_in1k.py b/configs/swin_transformer/swin-base_16xb64_in1k.py index 05a95b44..a0a566f5 100644 --- a/configs/swin_transformer/swin-base_16xb64_in1k.py +++ b/configs/swin_transformer/swin-base_16xb64_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-large_16xb64_in1k-384px.py b/configs/swin_transformer/swin-large_16xb64_in1k-384px.py index 5ba52b35..fd9c22ac 100644 --- a/configs/swin_transformer/swin-large_16xb64_in1k-384px.py +++ b/configs/swin_transformer/swin-large_16xb64_in1k-384px.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-large_16xb64_in1k.py b/configs/swin_transformer/swin-large_16xb64_in1k.py index 36121efc..db25a000 100644 --- a/configs/swin_transformer/swin-large_16xb64_in1k.py +++ b/configs/swin_transformer/swin-large_16xb64_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-large_8xb8_cub_384px.py b/configs/swin_transformer/swin-large_8xb8_cub_384px.py index 7af5b53b..0f957315 100644 --- a/configs/swin_transformer/swin-large_8xb8_cub_384px.py +++ b/configs/swin_transformer/swin-large_8xb8_cub_384px.py @@ -37,3 +37,8 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=20), # save last three checkpoints checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/swin_transformer/swin-small_16xb64_in1k.py b/configs/swin_transformer/swin-small_16xb64_in1k.py index 7c1a8e21..9bbb3fef 100644 --- a/configs/swin_transformer/swin-small_16xb64_in1k.py +++ b/configs/swin_transformer/swin-small_16xb64_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/swin_transformer/swin-tiny_16xb64_in1k.py b/configs/swin_transformer/swin-tiny_16xb64_in1k.py index 9a1ce250..bb9646c9 100644 --- a/configs/swin_transformer/swin-tiny_16xb64_in1k.py +++ b/configs/swin_transformer/swin-tiny_16xb64_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py b/configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py index 193b7775..6d6c5b5b 100644 --- a/configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py +++ b/configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py @@ -42,3 +42,8 @@ test_cfg = dict() # runtime settings custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/t2t_vit/t2t-vit-t-19_8xb64_in1k.py b/configs/t2t_vit/t2t-vit-t-19_8xb64_in1k.py index 8fce1f3a..dd8b4cf7 100644 --- a/configs/t2t_vit/t2t-vit-t-19_8xb64_in1k.py +++ b/configs/t2t_vit/t2t-vit-t-19_8xb64_in1k.py @@ -42,3 +42,8 @@ test_cfg = dict() # runtime settings custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/t2t_vit/t2t-vit-t-24_8xb64_in1k.py b/configs/t2t_vit/t2t-vit-t-24_8xb64_in1k.py index c024b4a1..630c752b 100644 --- a/configs/t2t_vit/t2t-vit-t-24_8xb64_in1k.py +++ b/configs/t2t_vit/t2t-vit-t-24_8xb64_in1k.py @@ -42,3 +42,8 @@ test_cfg = dict() # runtime settings custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=512) diff --git a/configs/tnt/tnt-s-p16_16xb64_in1k.py b/configs/tnt/tnt-s-p16_16xb64_in1k.py index 50412868..0a7518a6 100644 --- a/configs/tnt/tnt-s-p16_16xb64_in1k.py +++ b/configs/tnt/tnt-s-p16_16xb64_in1k.py @@ -49,3 +49,8 @@ param_scheduler = [ train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1) val_cfg = dict() test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-pcpvt-base_8xb128_in1k.py b/configs/twins/twins-pcpvt-base_8xb128_in1k.py index 3ac5d2ad..a52a0ea2 100644 --- a/configs/twins/twins-pcpvt-base_8xb128_in1k.py +++ b/configs/twins/twins-pcpvt-base_8xb128_in1k.py @@ -39,3 +39,8 @@ param_scheduler = [ begin=5, end=300) ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-pcpvt-large_16xb64_in1k.py b/configs/twins/twins-pcpvt-large_16xb64_in1k.py index b5ad5472..c136c02c 100644 --- a/configs/twins/twins-pcpvt-large_16xb64_in1k.py +++ b/configs/twins/twins-pcpvt-large_16xb64_in1k.py @@ -5,3 +5,8 @@ model = dict(backbone=dict(arch='large'), head=dict(in_channels=512)) # dataset settings train_dataloader = dict(batch_size=128) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-pcpvt-small_8xb128_in1k.py b/configs/twins/twins-pcpvt-small_8xb128_in1k.py index 9fe763b7..5530e397 100644 --- a/configs/twins/twins-pcpvt-small_8xb128_in1k.py +++ b/configs/twins/twins-pcpvt-small_8xb128_in1k.py @@ -2,3 +2,8 @@ _base_ = ['twins-pcpvt-base_8xb128_in1k.py'] # model settings model = dict(backbone=dict(arch='small'), head=dict(in_channels=512)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-svt-base_8xb128_in1k.py b/configs/twins/twins-svt-base_8xb128_in1k.py index 1d24f63b..0f00981f 100644 --- a/configs/twins/twins-svt-base_8xb128_in1k.py +++ b/configs/twins/twins-svt-base_8xb128_in1k.py @@ -39,3 +39,8 @@ param_scheduler = [ begin=5, end=300) ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-svt-large_16xb64_in1k.py b/configs/twins/twins-svt-large_16xb64_in1k.py index e8a1eba8..e32dbe11 100644 --- a/configs/twins/twins-svt-large_16xb64_in1k.py +++ b/configs/twins/twins-svt-large_16xb64_in1k.py @@ -5,3 +5,8 @@ model = dict(backbone=dict(arch='large'), head=dict(in_channels=1024)) # dataset settings train_dataloader = dict(batch_size=64) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (16 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/twins/twins-svt-small_8xb128_in1k.py b/configs/twins/twins-svt-small_8xb128_in1k.py index 2ffe267b..97c5a945 100644 --- a/configs/twins/twins-svt-small_8xb128_in1k.py +++ b/configs/twins/twins-svt-small_8xb128_in1k.py @@ -2,3 +2,8 @@ _base_ = ['twins-svt-base_8xb128_in1k.py'] # model settings model = dict(backbone=dict(arch='small'), head=dict(in_channels=512)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/van/van-base_8xb128_in1k.py b/configs/van/van-base_8xb128_in1k.py index 3099dc37..4dd53c58 100644 --- a/configs/van/van-base_8xb128_in1k.py +++ b/configs/van/van-base_8xb128_in1k.py @@ -63,3 +63,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/van/van-large_8xb128_in1k.py b/configs/van/van-large_8xb128_in1k.py index a843b732..7341329b 100644 --- a/configs/van/van-large_8xb128_in1k.py +++ b/configs/van/van-large_8xb128_in1k.py @@ -63,3 +63,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/van/van-small_8xb128_in1k.py b/configs/van/van-small_8xb128_in1k.py index 04322ae7..29e14e34 100644 --- a/configs/van/van-small_8xb128_in1k.py +++ b/configs/van/van-small_8xb128_in1k.py @@ -63,3 +63,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/van/van-tiny_8xb128_in1k.py b/configs/van/van-tiny_8xb128_in1k.py index 1d2d799e..f1eb385b 100644 --- a/configs/van/van-tiny_8xb128_in1k.py +++ b/configs/van/van-tiny_8xb128_in1k.py @@ -63,3 +63,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule settings optim_wrapper = dict(clip_grad=dict(max_norm=5.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/vgg/vgg11_8xb32_in1k.py b/configs/vgg/vgg11_8xb32_in1k.py index 616233c4..08e2c9d9 100644 --- a/configs/vgg/vgg11_8xb32_in1k.py +++ b/configs/vgg/vgg11_8xb32_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(optimizer=dict(lr=0.01)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg11bn_8xb32_in1k.py b/configs/vgg/vgg11bn_8xb32_in1k.py index 22f55ef0..dec892c8 100644 --- a/configs/vgg/vgg11bn_8xb32_in1k.py +++ b/configs/vgg/vgg11bn_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py', ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg13_8xb32_in1k.py b/configs/vgg/vgg13_8xb32_in1k.py index ec1c98fb..8602b255 100644 --- a/configs/vgg/vgg13_8xb32_in1k.py +++ b/configs/vgg/vgg13_8xb32_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(optimizer=dict(lr=0.01)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg13bn_8xb32_in1k.py b/configs/vgg/vgg13bn_8xb32_in1k.py index 3cb3592b..55096eb3 100644 --- a/configs/vgg/vgg13bn_8xb32_in1k.py +++ b/configs/vgg/vgg13bn_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py', ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg16_8xb16_voc.py b/configs/vgg/vgg16_8xb16_voc.py index 22b1891d..e4541979 100644 --- a/configs/vgg/vgg16_8xb16_voc.py +++ b/configs/vgg/vgg16_8xb16_voc.py @@ -36,3 +36,8 @@ param_scheduler = dict(type='StepLR', by_epoch=True, step_size=20, gamma=0.1) train_cfg = dict(by_epoch=True, max_epochs=40, val_interval=1) val_cfg = dict() test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/configs/vgg/vgg16_8xb32_in1k.py b/configs/vgg/vgg16_8xb32_in1k.py index a291da28..80c8f11b 100644 --- a/configs/vgg/vgg16_8xb32_in1k.py +++ b/configs/vgg/vgg16_8xb32_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(optimizer=dict(lr=0.01)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg16bn_8xb32_in1k.py b/configs/vgg/vgg16bn_8xb32_in1k.py index f6bbb81b..fce036c8 100644 --- a/configs/vgg/vgg16bn_8xb32_in1k.py +++ b/configs/vgg/vgg16bn_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py', ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg19_8xb32_in1k.py b/configs/vgg/vgg19_8xb32_in1k.py index 88cd24c1..50bcb2cc 100644 --- a/configs/vgg/vgg19_8xb32_in1k.py +++ b/configs/vgg/vgg19_8xb32_in1k.py @@ -7,3 +7,8 @@ _base_ = [ # schedule settings optim_wrapper = dict(optimizer=dict(lr=0.01)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vgg/vgg19bn_8xb32_in1k.py b/configs/vgg/vgg19bn_8xb32_in1k.py index 4b4f34ab..b9defa93 100644 --- a/configs/vgg/vgg19bn_8xb32_in1k.py +++ b/configs/vgg/vgg19bn_8xb32_in1k.py @@ -4,3 +4,8 @@ _base_ = [ '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py', ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py b/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py index 539fecc7..a4cd4c92 100644 --- a/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py +++ b/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py @@ -36,3 +36,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/configs/vision_transformer/vit-base-p16_pt-32xb128-mae_in1k-224.py b/configs/vision_transformer/vit-base-p16_pt-32xb128-mae_in1k-224.py index 68e8b02b..4dbd9a46 100644 --- a/configs/vision_transformer/vit-base-p16_pt-32xb128-mae_in1k-224.py +++ b/configs/vision_transformer/vit-base-p16_pt-32xb128-mae_in1k-224.py @@ -51,3 +51,8 @@ optim_wrapper = dict( # runtime settings custom_hooks = [dict(type='EMAHook', momentum=1e-4)] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py b/configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py index 0a9e5156..d25281b8 100644 --- a/configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py +++ b/configs/vision_transformer/vit-base-p16_pt-64xb64_in1k-224.py @@ -13,3 +13,8 @@ model = dict( # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-base-p32_ft-64xb64_in1k-384.py b/configs/vision_transformer/vit-base-p32_ft-64xb64_in1k-384.py index 2322c226..53ae6adf 100644 --- a/configs/vision_transformer/vit-base-p32_ft-64xb64_in1k-384.py +++ b/configs/vision_transformer/vit-base-p32_ft-64xb64_in1k-384.py @@ -36,3 +36,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-base-p32_pt-64xb64_in1k-224.py b/configs/vision_transformer/vit-base-p32_pt-64xb64_in1k-224.py index 83a92fca..f6ba5174 100644 --- a/configs/vision_transformer/vit-base-p32_pt-64xb64_in1k-224.py +++ b/configs/vision_transformer/vit-base-p32_pt-64xb64_in1k-224.py @@ -13,3 +13,8 @@ model = dict( # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-large-p16_ft-64xb64_in1k-384.py b/configs/vision_transformer/vit-large-p16_ft-64xb64_in1k-384.py index 917117c7..e4e4b25f 100644 --- a/configs/vision_transformer/vit-large-p16_ft-64xb64_in1k-384.py +++ b/configs/vision_transformer/vit-large-p16_ft-64xb64_in1k-384.py @@ -36,3 +36,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-large-p16_pt-64xb64_in1k-224.py b/configs/vision_transformer/vit-large-p16_pt-64xb64_in1k-224.py index 0cf9d8e1..e10336aa 100644 --- a/configs/vision_transformer/vit-large-p16_pt-64xb64_in1k-224.py +++ b/configs/vision_transformer/vit-large-p16_pt-64xb64_in1k-224.py @@ -13,3 +13,8 @@ model = dict( # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-large-p32_ft-64xb64_in1k-384.py b/configs/vision_transformer/vit-large-p32_ft-64xb64_in1k-384.py index 65bdf41e..5afd05de 100644 --- a/configs/vision_transformer/vit-large-p32_ft-64xb64_in1k-384.py +++ b/configs/vision_transformer/vit-large-p32_ft-64xb64_in1k-384.py @@ -36,3 +36,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline)) # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/vision_transformer/vit-large-p32_pt-64xb64_in1k-224.py b/configs/vision_transformer/vit-large-p32_pt-64xb64_in1k-224.py index c1b5a3d8..fc31b490 100644 --- a/configs/vision_transformer/vit-large-p32_pt-64xb64_in1k-224.py +++ b/configs/vision_transformer/vit-large-p32_pt-64xb64_in1k-224.py @@ -13,3 +13,8 @@ model = dict( # schedule setting optim_wrapper = dict(clip_grad=dict(max_norm=1.0)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (64 GPUs) x (64 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/configs/wrn/wide-resnet101_8xb32_in1k.py b/configs/wrn/wide-resnet101_8xb32_in1k.py index d1bf5e5e..34d02f33 100644 --- a/configs/wrn/wide-resnet101_8xb32_in1k.py +++ b/configs/wrn/wide-resnet101_8xb32_in1k.py @@ -5,3 +5,8 @@ _base_ = [ ] model = dict(backbone=dict(depth=101)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/wrn/wide-resnet50_8xb32_in1k.py b/configs/wrn/wide-resnet50_8xb32_in1k.py index edf6a051..0de276ba 100644 --- a/configs/wrn/wide-resnet50_8xb32_in1k.py +++ b/configs/wrn/wide-resnet50_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_resize.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024) diff --git a/configs/wrn/wide-resnet50_timm_8xb32_in1k.py b/configs/wrn/wide-resnet50_timm_8xb32_in1k.py index 8dca8f37..393ec168 100644 --- a/configs/wrn/wide-resnet50_timm_8xb32_in1k.py +++ b/configs/wrn/wide-resnet50_timm_8xb32_in1k.py @@ -3,3 +3,8 @@ _base_ = [ '../_base_/datasets/imagenet_bs32_pil_bicubic.py', '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' ] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=1024)