Refactor scheduler configuration

This commit is contained in:
Ezra-Yu 2022-05-23 09:31:57 +00:00 committed by mzr1996
parent 1f2f0dae5e
commit 58d9f649ed
42 changed files with 371 additions and 218 deletions

View File

@ -1,5 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
# learning policy
lr_config = dict(policy='step', step=[100, 150])
runner = dict(type='EpochBasedRunner', max_epochs=200)
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[100, 150], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=200)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -2,12 +2,12 @@
optimizer = dict(
type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=0.01,
warmup_by_epoch=True)
param_scheduler = [
dict(type='LinearLR', start_factor=0.01, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
]
runner = dict(type='EpochBasedRunner', max_epochs=100)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -16,13 +16,23 @@ optimizer = dict(
paramwise_cfg=paramwise_cfg)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False,
min_lr_ratio=1e-2,
warmup='linear',
warmup_ratio=1e-3,
warmup_iters=5 * 1252,
warmup_by_epoch=False)
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=False,
begin=0,
end=5 * 1252),
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1e-2,
by_epoch=True,
begin=5,
end=300)
]
runner = dict(type='EpochBasedRunner', max_epochs=300)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -17,13 +17,18 @@ optimizer = dict(
paramwise_cfg=paramwise_cfg)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False,
min_lr_ratio=1e-2,
warmup='linear',
warmup_ratio=1e-3,
warmup_iters=20,
warmup_by_epoch=True)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-3, by_epoch=False, begin=0, end=20),
dict(
type='CosineAnnealingLR',
T_max=280,
eta_min=1e-2,
by_epoch=True,
begin=10,
end=300)
]
runner = dict(type='EpochBasedRunner', max_epochs=300)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,11 +1,12 @@
# optimizer
optimizer = dict(type='SGD', lr=0.8, momentum=0.9, weight_decay=5e-5)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=0.1,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=100)
param_scheduler = [
dict(type='LinearLR', start_factor=0.1, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -6,11 +6,21 @@ optimizer = dict(
weight_decay=0.00004,
paramwise_cfg=dict(norm_decay_mult=0))
# learning policy
lr_config = dict(
policy='poly',
min_lr=0,
by_epoch=False,
warmup='constant',
warmup_iters=5000,
)
runner = dict(type='EpochBasedRunner', max_epochs=300)
param_scheduler = [
dict(type='ConstantLR', factor=0.1, by_epoch=False, begin=0, end=5 * 1252),
dict(type='PolyLR', eta_min=0, by_epoch=True, begin=5, end=300)
]
# old learning policy
# lr_config = dict(
# policy='poly',
# min_lr=0,
# by_epoch=False,
# warmup='constant',
# warmup_iters=5000,
# )
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every other epoch
test_cfg = dict()

View File

@ -3,10 +3,14 @@ optimizer = dict(
type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=2500,
warmup_ratio=0.25,
step=[30, 60, 90])
runner = dict(type='EpochBasedRunner', max_epochs=100)
param_scheduler = [
dict(
type='LinearLR', start_factor=0.25, by_epoch=False, begin=0, end=2500),
dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -10,10 +10,30 @@ paramwise_cfg = dict(
'.backbone.pos_embed': dict(decay_mult=0.0)
})
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=10000,
warmup_ratio=1e-4)
runner = dict(type='EpochBasedRunner', max_epochs=300)
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=False,
begin=0,
end=10 * 626),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=1e-2,
by_epoch=True,
begin=10,
end=300)
]
# old learning policy
# lr_config = dict(
# policy='CosineAnnealing',
# min_lr=0,
# warmup='linear',
# warmup_iters=10000,
# warmup_ratio=1e-4)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -2,10 +2,24 @@
optimizer = dict(
type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=2500,
warmup_ratio=0.25)
runner = dict(type='EpochBasedRunner', max_epochs=100)
param_scheduler = [
dict(
type='LinearLR',
start_factor=0.25,
by_epoch=False,
begin=0,
end=5 * 626),
dict(type='CosineAnnealingLR', T_max=95, by_epoch=True, begin=5, end=100)
]
# old learning policy
# lr_config = dict(
# policy='CosineAnnealing',
# min_lr=0,
# warmup='linear',
# warmup_iters=2500,
# warmup_ratio=0.25)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -2,11 +2,23 @@
optimizer = dict(type='Lamb', lr=0.005, weight_decay=0.02)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=1.0e-6,
warmup='linear',
# For ImageNet-1k, 626 iters per epoch, warmup 5 epochs.
warmup_iters=5 * 626,
warmup_ratio=0.0001)
runner = dict(type='EpochBasedRunner', max_epochs=100)
param_scheduler = [
dict(
type='LinearLR',
start_factor=0.0001,
by_epoch=False,
begin=0,
end=5 * 626),
dict(
type='CosineAnnealingLR',
T_max=95,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=100)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,5 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
# learning policy
lr_config = dict(policy='step', step=[30, 60, 90])
runner = dict(type='EpochBasedRunner', max_epochs=100)
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,6 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
# learning policy
lr_config = dict(policy='step', step=[40, 80, 120])
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[40, 80, 120], gamma=0.1)
runner = dict(type='EpochBasedRunner', max_epochs=140)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=140)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,10 +1,14 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=25025,
warmup_ratio=0.25)
runner = dict(type='EpochBasedRunner', max_epochs=200)
param_scheduler = [
dict(
type='LinearLR', start_factor=0.25, by_epoch=False, begin=0,
end=25025),
dict(type='CosineAnnealingLR', T_max=195, by_epoch=True, begin=5, end=200)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=200)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,6 +1,10 @@
# optimizer
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0)
param_scheduler = dict(
type='CosineAnnealingLR', T_max=100, by_epoch=True, begin=0, end=100)
runner = dict(type='EpochBasedRunner', max_epochs=100)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -1,5 +1,9 @@
# optimizer
optimizer = dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=0.00004)
# learning policy
lr_config = dict(policy='step', gamma=0.98, step=1)
runner = dict(type='EpochBasedRunner', max_epochs=300)
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=1, gamma=0.98)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -13,11 +13,26 @@ optimizer = dict(
)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=10000,
warmup_ratio=1e-4,
)
runner = dict(type='EpochBasedRunner', max_epochs=300)
param_scheduler = [
dict(
type='LinearLR',
start_factor=1e-4,
by_epoch=False,
begin=0,
end=30 * 308),
dict(
type='CosineAnnealingLR', T_max=270, by_epoch=True, begin=30, end=300)
]
# old learning policy
# lr_config = dict(
# policy='CosineAnnealing',
# min_lr=0,
# warmup='linear',
# warmup_iters=10000,
# warmup_ratio=1e-4,
# )
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -9,4 +9,4 @@ default_hooks = dict(optimizer=dict(grad_clip=dict(max_norm=5.0)))
optimizer = dict(lr=0.01)
runner = dict(type='EpochBasedRunner', max_epochs=150)
train_cfg = dict(by_epoch=True, max_epochs=150)

View File

@ -9,4 +9,4 @@ default_hooks = dict(optimizer=dict(grad_clip=dict(max_norm=5.0)))
optimizer = dict(lr=0.01)
runner = dict(type='EpochBasedRunner', max_epochs=150)
train_cfg = dict(by_epoch=True, max_epochs=150)

View File

@ -9,4 +9,4 @@ default_hooks = dict(optimizer=dict(grad_clip=dict(max_norm=5.0)))
optimizer = dict(lr=0.01)
runner = dict(type='EpochBasedRunner', max_epochs=300)
train_cfg = dict(by_epoch=True, max_epochs=300)

View File

@ -6,7 +6,7 @@
## Abstract
Recent work has shown that convolutional networks can be substantially deeper, more accurate, and efficient to train if they contain shorter connections between layers close to the input and those close to the output. In this paper, we embrace this observation and introduce the Dense Convolutional Network (DenseNet), which connects each layer to every other layer in a feed-forward fashion. Whereas traditional convolutional networks with L layers have L connections - one between each layer and its subsequent layer - our network has L(L+1)/2 direct connections. For each layer, the feature-maps of all preceding layers are used as inputs, and its own feature-maps are used as inputs into all subsequent layers. DenseNets have several compelling advantages: they alleviate the vanishing-gradient problem, strengthen feature propagation, encourage feature reuse, and substantially reduce the number of parameters. We evaluate our proposed architecture on four highly competitive object recognition benchmark tasks (CIFAR-10, CIFAR-100, SVHN, and ImageNet). DenseNets obtain significant improvements over the state-of-the-art on most of them, whilst requiring less computation to achieve high performance.
Recent work has shown that convolutional networks can be substantially deeper, more accurate, and efficient to train if they contain shorter connections between layers close to the input and those close to the output. In this paper, we embrace this observation and introduce the Dense Convolutional Network (DenseNet), which connects each layer to every layer in a feed-forward fashion. Whereas traditional convolutional networks with L layers have L connections - one between each layer and its subsequent layer - our network has L(L+1)/2 direct connections. For each layer, the feature-maps of all preceding layers are used as inputs, and its own feature-maps are used as inputs into all subsequent layers. DenseNets have several compelling advantages: they alleviate the vanishing-gradient problem, strengthen feature propagation, encourage feature reuse, and substantially reduce the number of parameters. We evaluate our proposed architecture on four highly competitive object recognition benchmark tasks (CIFAR-10, CIFAR-100, SVHN, and ImageNet). DenseNets obtain significant improvements over the state-of-the-art on most of them, whilst requiring less computation to achieve high performance.
<div align=center>
<img src="https://user-images.githubusercontent.com/42952108/162675098-9a670883-b13a-4a5a-a9c9-06c39c616a0a.png" width="100%"/>

View File

@ -7,4 +7,4 @@ _base_ = [
data = dict(samples_per_gpu=256)
runner = dict(type='EpochBasedRunner', max_epochs=90)
train_cfg = dict(by_epoch=True, max_epochs=90)

View File

@ -7,4 +7,4 @@ _base_ = [
data = dict(samples_per_gpu=256)
runner = dict(type='EpochBasedRunner', max_epochs=90)
train_cfg = dict(by_epoch=True, max_epochs=90)

View File

@ -7,4 +7,4 @@ _base_ = [
data = dict(samples_per_gpu=256)
runner = dict(type='EpochBasedRunner', max_epochs=90)
train_cfg = dict(by_epoch=True, max_epochs=90)

View File

@ -7,4 +7,4 @@ _base_ = [
data = dict(samples_per_gpu=256)
runner = dict(type='EpochBasedRunner', max_epochs=90)
train_cfg = dict(by_epoch=True, max_epochs=90)

View File

@ -38,7 +38,8 @@ evaluation = dict(
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=[15])
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
# checkpoint saving
checkpoint_config = dict(interval=1)
# yapf:disable
@ -49,8 +50,13 @@ log_config = dict(
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=5)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=5)
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mnist/'

View File

@ -154,5 +154,8 @@ optimizer = dict(
weight_decay=1e-5)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=2, gamma=0.973, by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=600)
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -4,5 +4,6 @@ _base_ = [
'../_base_/schedules/cifar10_bs128.py', '../_base_/default_runtime.py'
]
lr_config = dict(policy='step', step=[120, 170])
runner = dict(type='EpochBasedRunner', max_epochs=200)
param_scheduler = dict(
type='MultiStepLR', by_epoch=True, milestones=[120, 170], gamma=0.1)
train_cfg = dict(by_epoch=True, max_epochs=200)

View File

@ -154,5 +154,8 @@ optimizer = dict(
weight_decay=1e-5)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=2, gamma=0.973, by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=600)
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=2, gamma=0.973)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -171,11 +171,11 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-6,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=270)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=265, by_epoch=True, begin=5, end=270)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=270)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -171,11 +171,12 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-6,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=270)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=265, by_epoch=True, begin=5, end=270)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=270)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -171,11 +171,11 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-6,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=270)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=265, by_epoch=True, begin=5, end=270)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=270)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -171,11 +171,11 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-6,
warmup_by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=270)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=265, by_epoch=True, begin=5, end=270)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=270)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -8,4 +8,4 @@ _base_ = [
model = dict(head=dict(num_classes=21843))
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=90)
train_cfg = dict(by_epoch=True, max_epochs=90)

View File

@ -7,4 +7,5 @@ _base_ = [
model = dict(head=dict(num_classes=100))
optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005)
lr_config = dict(policy='step', step=[60, 120, 160], gamma=0.2)
param_scheduler = dict(
type='MultiStepLR', by_epoch=False, milestones=[60, 120, 160], gamma=0.2)

View File

@ -19,17 +19,20 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
# FIXME: lr in the first 300 epochs conforms to the CosineAnnealing and
# the lr in the last 10 epoch equals to min_lr
lr_config = dict(
policy='CosineAnnealingCooldown',
min_lr=1e-5,
cool_down_time=10,
cool_down_ratio=0.1,
by_epoch=True,
warmup_by_epoch=True,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-6)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=10),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=300),
dict(type='ConstantLR', factor=0.1, by_epoch=True, begin=300, end=310),
]
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
runner = dict(type='EpochBasedRunner', max_epochs=310)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=310)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -19,17 +19,20 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
# FIXME: lr in the first 300 epochs conforms to the CosineAnnealing and
# the lr in the last 10 epoch equals to min_lr
lr_config = dict(
policy='CosineAnnealingCooldown',
min_lr=1e-5,
cool_down_time=10,
cool_down_ratio=0.1,
by_epoch=True,
warmup_by_epoch=True,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-6)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=10),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=300),
dict(type='ConstantLR', factor=0.1, by_epoch=True, begin=300, end=310),
]
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
runner = dict(type='EpochBasedRunner', max_epochs=310)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=310)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -19,17 +19,20 @@ optimizer = dict(
optimizer_config = dict(grad_clip=None)
# learning policy
# FIXME: lr in the first 300 epochs conforms to the CosineAnnealing and
# the lr in the last 10 epoch equals to min_lr
lr_config = dict(
policy='CosineAnnealingCooldown',
min_lr=1e-5,
cool_down_time=10,
cool_down_ratio=0.1,
by_epoch=True,
warmup_by_epoch=True,
warmup='linear',
warmup_iters=10,
warmup_ratio=1e-6)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-6, by_epoch=True, begin=0, end=10),
dict(
type='CosineAnnealingLR',
T_max=290,
eta_min=1e-5,
by_epoch=True,
begin=10,
end=300),
dict(type='ConstantLR', factor=0.1, by_epoch=True, begin=300, end=310),
]
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
runner = dict(type='EpochBasedRunner', max_epochs=310)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=310)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -30,11 +30,13 @@ data = dict(
optimizer = dict(type='AdamW', lr=1e-3, weight_decay=0.05)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup_by_epoch=True,
warmup='linear',
warmup_iters=5,
warmup_ratio=1e-3)
runner = dict(type='EpochBasedRunner', max_epochs=300)
# learning policy
param_scheduler = [
dict(type='LinearLR', start_factor=1e-3, by_epoch=True, begin=0, end=5),
dict(type='CosineAnnealingLR', T_max=295, by_epoch=True, begin=5, end=300)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -23,13 +23,15 @@ optimizer = dict(
paramwise_cfg=paramwise_cfg)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=True,
min_lr_ratio=1e-2,
warmup='linear',
warmup_ratio=1e-3,
warmup_iters=5,
warmup_by_epoch=True)
param_scheduler = [
dict(type='LinearLR', start_factor=1e-3, by_epoch=True, begin=0, end=5),
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1e-2,
by_epoch=True,
begin=5,
end=300)
]
evaluation = dict(interval=1, metric='accuracy')

View File

@ -22,13 +22,16 @@ optimizer = dict(
betas=(0.9, 0.999),
paramwise_cfg=paramwise_cfg)
lr_config = dict(
policy='CosineAnnealing',
by_epoch=True,
min_lr_ratio=1e-2,
warmup='linear',
warmup_ratio=1e-3,
warmup_iters=5,
warmup_by_epoch=True)
# learning policy
param_scheduler = [
dict(type='LinearLR', start_factor=1e-3, by_epoch=True, begin=0, end=5),
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1e-2,
by_epoch=True,
begin=5,
end=300)
]
evaluation = dict(interval=1, metric='accuracy')

View File

@ -21,6 +21,9 @@ optimizer = dict(
paramwise_cfg=dict(custom_keys={'.backbone.classifier': dict(lr_mult=10)}))
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=20, gamma=0.1)
param_scheduler = dict(type='StepLR', by_epoch=True, step_size=20, gamma=0.1)
runner = dict(type='EpochBasedRunner', max_epochs=40)
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=40)
val_cfg = dict(interval=1) # validate every epoch
test_cfg = dict()

View File

@ -72,13 +72,15 @@ optimizer = dict(
)
# learning policy
lr_config = dict(
policy='CosineAnnealing',
min_lr=0,
warmup='linear',
warmup_iters=800,
warmup_ratio=0.02,
)
param_scheduler = [
dict(type='LinearLR', start_factor=0.02, by_epoch=False, begin=0, end=800),
dict(
type='CosineAnnealingLR',
T_max=4200,
by_epoch=False,
begin=800,
end=5000)
]
# ipu cfg
# model partition config