[Refactor] add auto_scale_lr

2022-07-14 19:15:49 +08:00 · 2022-07-14 19:15:49 +08:00 · ce81a07059
parent f37dc44a25
commit ce81a07059
183 changed files with 915 additions and 0 deletions
--- a/configs/conformer/conformer-base-p16_8xb128_in1k.py
+++ b/configs/conformer/conformer-base-p16_8xb128_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 train_dataloader = dict(batch_size=128)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/conformer/conformer-small-p16_8xb128_in1k.py
+++ b/configs/conformer/conformer-small-p16_8xb128_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 train_dataloader = dict(batch_size=128)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/conformer/conformer-small-p32_8xb128_in1k.py
+++ b/configs/conformer/conformer-small-p32_8xb128_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 train_dataloader = dict(batch_size=128)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/conformer/conformer-tiny-p16_8xb128_in1k.py
+++ b/configs/conformer/conformer-tiny-p16_8xb128_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 train_dataloader = dict(batch_size=128)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/convmixer/convmixer-1024-20_10xb64_in1k.py
+++ b/configs/convmixer/convmixer-1024-20_10xb64_in1k.py
@ -32,3 +32,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=150)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (10 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=640)
--- a/configs/convmixer/convmixer-1536-20_10xb64_in1k.py
+++ b/configs/convmixer/convmixer-1536-20_10xb64_in1k.py
@ -32,3 +32,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=150)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (10 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=640)
--- a/configs/convmixer/convmixer-768-32_10xb64_in1k.py
+++ b/configs/convmixer/convmixer-768-32_10xb64_in1k.py
@ -12,3 +12,8 @@ optim_wrapper = dict(
 )

 train_cfg = dict(by_epoch=True, max_epochs=300)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (10 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=640)
--- a/configs/convnext/convnext-base_32xb128_in1k.py
+++ b/configs/convnext/convnext-base_32xb128_in1k.py
@ -16,3 +16,8 @@ optim_wrapper = dict(

 # runtime setting
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/convnext/convnext-large_64xb64_in1k.py
+++ b/configs/convnext/convnext-large_64xb64_in1k.py
@ -16,3 +16,8 @@ optim_wrapper = dict(

 # runtime setting
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/convnext/convnext-small_32xb128_in1k.py
+++ b/configs/convnext/convnext-small_32xb128_in1k.py
@ -16,3 +16,8 @@ optim_wrapper = dict(

 # runtime setting
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/convnext/convnext-tiny_32xb128_in1k.py
+++ b/configs/convnext/convnext-tiny_32xb128_in1k.py
@ -16,3 +16,8 @@ optim_wrapper = dict(

 # runtime setting
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/convnext/convnext-xlarge_64xb64_in1k.py
+++ b/configs/convnext/convnext-xlarge_64xb64_in1k.py
@ -16,3 +16,8 @@ optim_wrapper = dict(

 # runtime setting
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/cspnet/cspdarknet50_8xb32_in1k.py
+++ b/configs/cspnet/cspdarknet50_8xb32_in1k.py
@ -43,3 +43,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/cspnet/cspresnet50_8xb32_in1k.py
+++ b/configs/cspnet/cspresnet50_8xb32_in1k.py
@ -43,3 +43,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/cspnet/cspresnext50_8xb32_in1k.py
+++ b/configs/cspnet/cspresnext50_8xb32_in1k.py
@ -43,3 +43,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py
+++ b/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py
@ -7,3 +7,8 @@ model = dict(
    # Change to the path of the pretrained model
    # init_cfg=dict(type='Pretrained', checkpoint=''),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/deit/deit-base-distilled_pt-16xb64_in1k.py
+++ b/configs/deit/deit-base-distilled_pt-16xb64_in1k.py
@ -8,3 +8,8 @@ model = dict(

 # dataset settings
 train_dataloader = dict(batch_size=64)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/deit/deit-base_ft-16xb32_in1k-384px.py
+++ b/configs/deit/deit-base_ft-16xb32_in1k-384px.py
@ -30,3 +30,8 @@ train_dataloader = dict(batch_size=32)

 # schedule settings
 optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/deit/deit-base_pt-16xb64_in1k.py
+++ b/configs/deit/deit-base_pt-16xb64_in1k.py
@ -12,3 +12,8 @@ train_dataloader = dict(batch_size=64)

 # runtime settings
 custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/deit/deit-small-distilled_pt-4xb256_in1k.py
+++ b/configs/deit/deit-small-distilled_pt-4xb256_in1k.py
@ -5,3 +5,8 @@ model = dict(
    backbone=dict(type='DistilledVisionTransformer', arch='deit-small'),
    head=dict(type='DeiTClsHead', in_channels=384),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/deit/deit-small_pt-4xb256_in1k.py
+++ b/configs/deit/deit-small_pt-4xb256_in1k.py
@ -46,3 +46,8 @@ optim_wrapper = dict(
        }),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py
+++ b/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py
@ -5,3 +5,8 @@ model = dict(
    backbone=dict(type='DistilledVisionTransformer', arch='deit-tiny'),
    head=dict(type='DeiTClsHead', in_channels=192),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/deit/deit-tiny_pt-4xb256_in1k.py
+++ b/configs/deit/deit-tiny_pt-4xb256_in1k.py
@ -5,3 +5,8 @@ model = dict(
    backbone=dict(type='VisionTransformer', arch='deit-tiny'),
    head=dict(type='VisionTransformerClsHead', in_channels=192),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/densenet/densenet121_4xb256_in1k.py
+++ b/configs/densenet/densenet121_4xb256_in1k.py
@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256)

 # schedule settings
 train_cfg = dict(by_epoch=True, max_epochs=90)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/densenet/densenet161_4xb256_in1k.py
+++ b/configs/densenet/densenet161_4xb256_in1k.py
@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256)

 # schedule settings
 train_cfg = dict(by_epoch=True, max_epochs=90)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/densenet/densenet169_4xb256_in1k.py
+++ b/configs/densenet/densenet169_4xb256_in1k.py
@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256)

 # schedule settings
 train_cfg = dict(by_epoch=True, max_epochs=90)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/densenet/densenet201_4xb256_in1k.py
+++ b/configs/densenet/densenet201_4xb256_in1k.py
@ -10,3 +10,8 @@ train_dataloader = dict(batch_size=256)

 # schedule settings
 train_cfg = dict(by_epoch=True, max_epochs=90)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (256 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b0_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b0_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b1_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b1_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b2_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b2_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b3_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b3_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b4_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b4_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b5_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b5_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b6_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b6_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b7_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b7_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-b8_8xb32_in1k.py
+++ b/configs/efficientnet/efficientnet-b8_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-em_8xb32-01norm_in1k.py
@ -29,3 +29,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py
+++ b/configs/efficientnet/efficientnet-es_8xb32-01norm_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/hrnet/hrnet-w18_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w18_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w30_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w30_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w32_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w32_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w40_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w40_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w44_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w44_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w48_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w48_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/hrnet/hrnet-w64_4xb32_in1k.py
+++ b/configs/hrnet/hrnet-w64_4xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_coslr.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/inception_v3/inception-v3_8xb32_in1k.py
+++ b/configs/inception_v3/inception-v3_8xb32_in1k.py
@ -22,3 +22,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/lenet/lenet5_mnist.py
+++ b/configs/lenet/lenet5_mnist.py
@ -84,3 +84,8 @@ load_from = None

 # whether to resume the training of the checkpoint
 resume_from = None
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py
+++ b/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py
+++ b/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py
@ -6,3 +6,8 @@ _base_ = [
 ]

 optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
+++ b/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
@ -4,3 +4,8 @@ _base_ = [
    '../_base_/schedules/imagenet_bs256_epochstep.py',
    '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-large_8xb32_in1k.py
@ -21,3 +21,8 @@ param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
 train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
 val_cfg = dict()
 test_cfg = dict()
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb16_cifar10.py
@ -13,3 +13,8 @@ param_scheduler = dict(
 )

 train_cfg = dict(by_epoch=True, max_epochs=200)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (16 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py
+++ b/configs/mobilenet_v3/mobilenet-v3-small_8xb32_in1k.py
@ -21,3 +21,8 @@ param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
 train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
 val_cfg = dict()
 test_cfg = dict()
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/poolformer/poolformer-m36_32xb128_in1k.py
+++ b/configs/poolformer/poolformer-m36_32xb128_in1k.py
@ -10,3 +10,8 @@ optim_wrapper = dict(
    optimizer=dict(lr=4e-3),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/poolformer/poolformer-m48_32xb128_in1k.py
+++ b/configs/poolformer/poolformer-m48_32xb128_in1k.py
@ -10,3 +10,8 @@ optim_wrapper = dict(
    optimizer=dict(lr=4e-3),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/poolformer/poolformer-s12_32xb128_in1k.py
+++ b/configs/poolformer/poolformer-s12_32xb128_in1k.py
@ -10,3 +10,8 @@ optim_wrapper = dict(
    optimizer=dict(lr=4e-3),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/poolformer/poolformer-s24_32xb128_in1k.py
+++ b/configs/poolformer/poolformer-s24_32xb128_in1k.py
@ -10,3 +10,8 @@ optim_wrapper = dict(
    optimizer=dict(lr=4e-3),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/poolformer/poolformer-s36_32xb128_in1k.py
+++ b/configs/poolformer/poolformer-s36_32xb128_in1k.py
@ -10,3 +10,8 @@ optim_wrapper = dict(
    optimizer=dict(lr=4e-3),
    clip_grad=dict(max_norm=5.0),
 )
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
--- a/configs/regnet/regnetx-1.6gf_8xb128_in1k.py
+++ b/configs/regnet/regnetx-1.6gf_8xb128_in1k.py
@ -4,3 +4,8 @@ _base_ = ['./regnetx-400mf_8xb128_in1k.py']
 model = dict(
    backbone=dict(type='RegNet', arch='regnetx_1.6gf'),
    head=dict(in_channels=912, ))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/regnet/regnetx-12gf_8xb64_in1k.py
+++ b/configs/regnet/regnetx-12gf_8xb64_in1k.py
@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64)
 # schedule settings
 # for batch_size 512, use lr = 0.4
 optim_wrapper = dict(optimizer=dict(lr=0.4))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/regnet/regnetx-3.2gf_8xb64_in1k.py
+++ b/configs/regnet/regnetx-3.2gf_8xb64_in1k.py
@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64)
 # schedule settings
 # for batch_size 512, use lr = 0.4
 optim_wrapper = dict(optimizer=dict(lr=0.4))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/regnet/regnetx-4.0gf_8xb64_in1k.py
+++ b/configs/regnet/regnetx-4.0gf_8xb64_in1k.py
@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64)
 # schedule settings
 # for batch_size 512, use lr = 0.4
 optim_wrapper = dict(optimizer=dict(lr=0.4))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/regnet/regnetx-400mf_8xb128_in1k.py
+++ b/configs/regnet/regnetx-400mf_8xb128_in1k.py
@ -53,3 +53,8 @@ custom_hooks = [
        interval=1,
        priority='ABOVE_NORMAL')
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/regnet/regnetx-6.4gf_8xb64_in1k.py
+++ b/configs/regnet/regnetx-6.4gf_8xb64_in1k.py
@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64)
 # schedule settings
 # for batch_size 512, use lr = 0.4
 optim_wrapper = dict(optimizer=dict(lr=0.4))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/regnet/regnetx-8.0gf_8xb64_in1k.py
+++ b/configs/regnet/regnetx-8.0gf_8xb64_in1k.py
@ -11,3 +11,8 @@ train_dataloader = dict(batch_size=64)
 # schedule settings
 # for batch_size 512, use lr = 0.4
 optim_wrapper = dict(optimizer=dict(lr=0.4))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/regnet/regnetx-800mf_8xb128_in1k.py
+++ b/configs/regnet/regnetx-800mf_8xb128_in1k.py
@ -4,3 +4,8 @@ _base_ = ['./regnetx-400mf_8xb128_in1k.py']
 model = dict(
    backbone=dict(type='RegNet', arch='regnetx_800mf'),
    head=dict(in_channels=672, ))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=1024)
--- a/configs/repmlp/repmlp-base_8xb64_in1k-256px.py
+++ b/configs/repmlp/repmlp-base_8xb64_in1k-256px.py
@ -29,3 +29,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

 # schedule settings
 optim_wrapper = dict(clip_grad=dict(max_norm=1.0))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/repmlp/repmlp-base_8xb64_in1k.py
+++ b/configs/repmlp/repmlp-base_8xb64_in1k.py
@ -19,3 +19,8 @@ test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

 # schedule settings
 optim_wrapper = dict(clip_grad=dict(max_norm=5.0))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py
+++ b/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py
@ -1,3 +1,8 @@
 _base_ = ['./repmlp-base_8xb64_in1k.py']

 model = dict(backbone=dict(deploy=True))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py
+++ b/configs/repmlp/repmlp-base_deploy_8xb64_in1k-256px.py
@ -1,3 +1,8 @@
 _base_ = ['./repmlp-base_8xb64_in1k-256px.py']

 model = dict(backbone=dict(deploy=True))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=512)
--- a/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py
@ -10,3 +10,8 @@ param_scheduler = dict(
    type='CosineAnnealingLR', T_max=120, by_epoch=True, begin=0, end=120)

 train_cfg = dict(by_epoch=True, max_epochs=120)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='A1'))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='A2'), head=dict(in_channels=1408))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='B0'), head=dict(in_channels=1280))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='B1'), head=dict(in_channels=2048))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='B1g2'), head=dict(in_channels=2048))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='B1g4'), head=dict(in_channels=2048))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py
+++ b/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-A0_4xb64-coslr-120e_in1k.py'

 model = dict(backbone=dict(arch='B2'), head=dict(in_channels=2560))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+++ b/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py'

 model = dict(backbone=dict(arch='B2g4'))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+++ b/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
@ -37,3 +37,8 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+++ b/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py'

 model = dict(backbone=dict(arch='B3g4'))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+++ b/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
@ -1,3 +1,8 @@
 _base_ = './repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py'

 model = dict(backbone=dict(arch='D2se'))
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (4 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/res2net/res2net101-w26-s4_8xb32_in1k.py
+++ b/configs/res2net/res2net101-w26-s4_8xb32_in1k.py
@ -3,3 +3,8 @@ _base_ = [
    '../_base_/datasets/imagenet_bs32_pil_resize.py',
    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/res2net/res2net50-w14-s8_8xb32_in1k.py
+++ b/configs/res2net/res2net50-w14-s8_8xb32_in1k.py
@ -3,3 +3,8 @@ _base_ = [
    '../_base_/datasets/imagenet_bs32_pil_resize.py',
    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/res2net/res2net50-w26-s8_8xb32_in1k.py
+++ b/configs/res2net/res2net50-w26-s8_8xb32_in1k.py
@ -3,3 +3,8 @@ _base_ = [
    '../_base_/datasets/imagenet_bs32_pil_resize.py',
    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/configs/resnest/resnest101_32xb64_in1k.py
+++ b/configs/resnest/resnest101_32xb64_in1k.py
@ -71,3 +71,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=270)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=2048)
--- a/configs/resnest/resnest200_64xb32_in1k.py
+++ b/configs/resnest/resnest200_64xb32_in1k.py
@ -67,3 +67,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=270)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=2048)
--- a/configs/resnest/resnest269_64xb32_in1k.py
+++ b/configs/resnest/resnest269_64xb32_in1k.py
@ -71,3 +71,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=270)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (64 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=2048)
--- a/configs/resnest/resnest50_32xb64_in1k.py
+++ b/configs/resnest/resnest50_32xb64_in1k.py
@ -71,3 +71,8 @@ param_scheduler = [
 ]

 train_cfg = dict(by_epoch=True, max_epochs=270)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (64 samples per GPU)
+auto_scale_lr = dict(base_batch_size=2048)
--- a/configs/resnet/resnet101_8xb16_cifar10.py
+++ b/configs/resnet/resnet101_8xb16_cifar10.py
@ -3,3 +3,8 @@ _base_ = [
    '../_base_/datasets/cifar10_bs16.py',
    '../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (16 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
--- a/configs/resnet/resnet101_8xb32_in1k.py
+++ b/configs/resnet/resnet101_8xb32_in1k.py
@ -2,3 +2,8 @@ _base_ = [
    '../_base_/models/resnet101.py', '../_base_/datasets/imagenet_bs32.py',
    '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 ]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (32 samples per GPU)
+auto_scale_lr = dict(base_batch_size=256)
--- a/Show More
+++ b/Show More