[Enhance] Support scheduling betas with MomentumScheduler. (#346)

* [Enhance] Support scheduling betas with MomentumScheduler.

* enhance ut

* test adam betas

* enhance ut

* enhance ut
pull/354/head
RangiLyu 2022-07-05 20:37:23 +08:00 committed by GitHub
parent 2853045e96
commit a3d2916790
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 363 additions and 134 deletions

View File

@ -220,6 +220,17 @@ class OptimWrapper:
"""
return self.optimizer.param_groups
@property
def defaults(self) -> dict:
"""A wrapper of ``Optimizer.defaults``.
Make OptimizeWrapper compatible with :class:`_ParamScheduler`.
Returns:
dict: the ``param_groups`` of :attr:`optimizer`.
"""
return self.optimizer.defaults
def get_lr(self) -> Dict[str, List[float]]:
"""Get the learning rate of the optimizer.

View File

@ -8,10 +8,37 @@ from .param_scheduler import (ConstantParamScheduler,
class MomentumSchedulerMixin:
"""A mixin class for momentum schedulers."""
"""A mixin class for momentum schedulers.
It can schedule the momentum in SGD and the beta_0 in Adam series.
"""
def __init__(self, optimizer, *args, **kwargs):
super().__init__(optimizer, 'momentum', *args, **kwargs)
self.use_betas = False
if 'momentum' in optimizer.defaults:
param_name = 'momentum'
elif 'betas' in optimizer.defaults:
# for Adam series optimizer, the momentum is beta_0
self.use_betas = True
param_name = 'momentum'
for group in optimizer.param_groups:
# set a reference momentum in the param groups for scheduling
group[param_name] = group['betas'][0]
else:
raise ValueError(
'optimizer must support momentum when using momentum scheduler'
)
super().__init__(optimizer, param_name, *args, **kwargs)
def step(self):
"""Adjusts the parameter value of each parameter group based on the
specified schedule."""
super().step()
if self.use_betas:
for group in self.optimizer.param_groups:
_, beta_1 = group['betas']
# update the betas with the calculated value
group['betas'] = (group['momentum'], beta_1)
@PARAM_SCHEDULERS.register_module()

View File

@ -1,4 +1,9 @@
# Copyright (c) OpenMMLab. All rights reserved.
# ------------------------------------------------------------------------
# Modified from https://github.com/pytorch/pytorch
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
import math
import warnings
import weakref

View File

@ -33,8 +33,17 @@ class TestLRScheduler(TestCase):
tearDown() -> cleanUp()
"""
self.model = ToyModel()
self.optimizer = optim.SGD(
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
lr = 0.05
self.layer2_mult = 10
self.optimizer = optim.SGD([{
'params': self.model.conv1.parameters()
}, {
'params': self.model.conv2.parameters(),
'lr': lr * self.layer2_mult,
}],
lr=lr,
momentum=0.01,
weight_decay=5e-4)
def test_base_scheduler_step(self):
with self.assertRaises(NotImplementedError):
@ -69,8 +78,7 @@ class TestLRScheduler(TestCase):
results = []
for epoch in range(5):
for param_group in self.optimizer.param_groups:
results.append(param_group['lr'])
results.append(self.optimizer.param_groups[0]['lr'])
# The order should be
# train_epoch() -> save_checkpoint() -> scheduler.step().
# Break at here to simulate the checkpoint is saved before
@ -80,8 +88,7 @@ class TestLRScheduler(TestCase):
scheduler.step()
scheduler2 = ExponentialLR(self.optimizer, gamma=0.9, last_step=4)
for epoch in range(6):
for param_group in self.optimizer.param_groups:
results.append(param_group['lr'])
results.append(self.optimizer.param_groups[0]['lr'])
scheduler2.step()
for epoch in range(epochs):
@ -121,7 +128,10 @@ class TestLRScheduler(TestCase):
def test_get_last_value(self):
epochs = 10
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepLR(self.optimizer, 3, gamma=0.1)
for epoch in range(epochs):
result = scheduler.get_last_value()
@ -171,7 +181,9 @@ class TestLRScheduler(TestCase):
single_targets = [0.05] * begin + [x * 0.05
for x in interpolation] + [0.05] * (
epochs - iters - begin)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearLR(
self.optimizer,
start_factor=start_factor,
@ -207,7 +219,9 @@ class TestLRScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepLR(
self.optimizer, gamma=0.1, step_size=3, verbose=True)
self._test_scheduler_value(scheduler, targets, epochs)
@ -220,7 +234,9 @@ class TestLRScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = MultiStepLR(
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
self._test_scheduler_value(scheduler, targets, epochs)
@ -234,7 +250,9 @@ class TestLRScheduler(TestCase):
# lr = 0.005 if 5 <= epoch
epochs = 10
single_targets = [0.025] * 4 + [0.05] * 6
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ConstantLR(self.optimizer, factor=1.0 / 2, end=5)
self._test_scheduler_value(scheduler, targets, epochs)
@ -260,7 +278,9 @@ class TestLRScheduler(TestCase):
]
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearLR(
self.optimizer, start_factor=start_factor, end=iters + 1)
self._test_scheduler_value(scheduler, targets, epochs)
@ -268,7 +288,9 @@ class TestLRScheduler(TestCase):
def test_exp_scheduler(self):
epochs = 10
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ExponentialLR(self.optimizer, gamma=0.9)
self._test_scheduler_value(scheduler, targets, epochs)
@ -280,7 +302,9 @@ class TestLRScheduler(TestCase):
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
for x in range(epochs)
]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = CosineAnnealingLR(self.optimizer, T_max=t, eta_min=eta_min)
self._test_scheduler_value(scheduler, targets, epochs)
@ -289,12 +313,17 @@ class TestLRScheduler(TestCase):
power = 0.9
min_lr = 0.001
iters = 4
single_targets = [
targets_layer1 = [
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets_layer2 = [
min_lr + (0.05 * self.layer2_mult - min_lr) *
(1 - i / iters)**power for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [targets_layer1, targets_layer2]
scheduler = PolyLR(
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
self._test_scheduler_value(scheduler, targets, epochs=10)
@ -365,8 +394,7 @@ class TestLRScheduler(TestCase):
epoch_length = 7
single_targets = [0.05] * 2 * epoch_length + [0.005] * 2 * epoch_length
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepLR.build_iter_from_epoch(
self.optimizer, gamma=0.1, step_size=2, epoch_length=epoch_length)
@ -385,8 +413,7 @@ class TestLRScheduler(TestCase):
0.0005
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = MultiStepLR.build_iter_from_epoch(
self.optimizer,
@ -403,8 +430,7 @@ class TestLRScheduler(TestCase):
single_targets = [0.025] * (5 * epoch_length -
1) + [0.05] * (5 * epoch_length + 1)
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ConstantLR.build_iter_from_epoch(
self.optimizer, factor=1.0 / 2, end=5, epoch_length=epoch_length)
@ -422,7 +448,9 @@ class TestLRScheduler(TestCase):
]
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
epochs * epoch_length - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearLR.build_iter_from_epoch(
self.optimizer,
start_factor=start_factor,
@ -438,8 +466,7 @@ class TestLRScheduler(TestCase):
0.05 * (0.9**x) for x in range(epochs * epoch_length)
]
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ExponentialLR.build_iter_from_epoch(
self.optimizer, gamma=0.9, epoch_length=epoch_length)
@ -456,8 +483,7 @@ class TestLRScheduler(TestCase):
for x in range(epochs * epoch_length)
]
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = CosineAnnealingLR.build_iter_from_epoch(
self.optimizer,
@ -474,15 +500,17 @@ class TestLRScheduler(TestCase):
epoch_length = 11
iters = end * epoch_length - 1
single_targets = [
targets_layer1 = [
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
]
targets_layer2 = [
min_lr + (0.05 * self.layer2_mult - min_lr) *
(1 - i / iters)**power for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [targets_layer1, targets_layer2]
scheduler = PolyLR.build_iter_from_epoch(
self.optimizer,
power=power,
@ -496,7 +524,9 @@ class TestLRScheduler(TestCase):
epochs = 12
single_targets = [0.025, 0.03125, 0.0375, 0.04375
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearLR(
self.optimizer, start_factor=1 / 2, begin=0, end=5)
scheduler2 = MultiStepLR(
@ -515,7 +545,9 @@ class TestLRScheduler(TestCase):
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
]
single_targets = single_targets1 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingLR(
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
@ -526,7 +558,9 @@ class TestLRScheduler(TestCase):
epochs = 10
single_targets = [0.025, 0.03125, 0.0375, 0.004375
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearLR(
self.optimizer, start_factor=1 / 2, begin=0, end=5)
scheduler2 = MultiStepLR(
@ -547,7 +581,9 @@ class TestLRScheduler(TestCase):
]
single_targets = single_targets1 + [single_targets1[-1]
] * 5 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingLR(
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)

View File

@ -34,12 +34,38 @@ class TestMomentumScheduler(TestCase):
tearDown() -> cleanUp()
"""
self.model = ToyModel()
self.optimizer = optim.SGD(
self.model.parameters(), lr=0.01, momentum=0.05, weight_decay=5e-4)
momentum = 0.05
self.layer2_mult = 10
self.optimizer = optim.SGD([{
'params': self.model.conv1.parameters()
}, {
'params': self.model.conv2.parameters(),
'momentum': momentum * self.layer2_mult
}],
lr=0.01,
momentum=momentum,
weight_decay=5e-4)
self.optimizer_with_betas = optim.Adam(
[{
'params': self.model.conv1.parameters()
}, {
'params': self.model.conv2.parameters(),
'betas': (momentum * self.layer2_mult, 0.999)
}],
lr=0.01,
betas=(momentum, 0.999),
weight_decay=5e-4)
def test_invalid_optimizer(self):
with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
StepMomentum('invalid_optimizer', step_size=1)
with self.assertRaisesRegex(
ValueError,
'optimizer must support momentum when using momentum scheduler'
):
optimizer = optim.ASGD(
self.model.parameters(),
lr=0.01,
)
StepMomentum(optimizer, step_size=1)
def test_overwrite_optimzer_step(self):
# raise warning if the counter in optimizer.step() is overwritten
@ -66,8 +92,7 @@ class TestMomentumScheduler(TestCase):
results = []
for epoch in range(5):
for param_group in self.optimizer.param_groups:
results.append(param_group['momentum'])
results.append(self.optimizer.param_groups[0]['momentum'])
# The order should be
# train_epoch() -> save_checkpoint() -> scheduler.step().
# Break at here to simulate the checkpoint is saved before
@ -78,8 +103,7 @@ class TestMomentumScheduler(TestCase):
scheduler2 = ExponentialMomentum(
self.optimizer, gamma=0.9, last_step=4)
for epoch in range(6):
for param_group in self.optimizer.param_groups:
results.append(param_group['momentum'])
results.append(self.optimizer.param_groups[0]['momentum'])
scheduler2.step()
for epoch in range(epochs):
@ -119,7 +143,10 @@ class TestMomentumScheduler(TestCase):
def test_get_last_value(self):
epochs = 10
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
targets = [
single_targets, [t * self.layer2_mult for t in single_targets]
]
scheduler = StepMomentum(self.optimizer, 3, gamma=0.1)
for epoch in range(epochs):
result = scheduler.get_last_value()
@ -170,15 +197,18 @@ class TestMomentumScheduler(TestCase):
single_targets = [0.05] * begin + [x * 0.05
for x in interpolation] + [0.05] * (
epochs - iters - begin)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearMomentum(
self.optimizer,
start_factor=start_factor,
begin=begin,
end=begin + iters + 1)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
def _test_scheduler_value(self,
optimizer,
schedulers,
targets,
epochs=10,
@ -186,8 +216,7 @@ class TestMomentumScheduler(TestCase):
if isinstance(schedulers, _ParamScheduler):
schedulers = [schedulers]
for epoch in range(epochs):
for param_group, target in zip(self.optimizer.param_groups,
targets):
for param_group, target in zip(optimizer.param_groups, targets):
assert_allclose(
target[epoch],
param_group[param_name],
@ -196,6 +225,15 @@ class TestMomentumScheduler(TestCase):
param_group[param_name]),
atol=1e-5,
rtol=0)
if 'betas' in optimizer.defaults:
assert_allclose(
target[epoch],
param_group['betas'][0],
msg='{} is wrong in epoch {}: expected {}, got {}'.
format('betas_0', epoch, target[epoch],
param_group['betas'][0]),
atol=1e-5,
rtol=0)
[scheduler.step() for scheduler in schedulers]
def test_step_scheduler(self):
@ -206,10 +244,17 @@ class TestMomentumScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepMomentum(
self.optimizer, gamma=0.1, step_size=3, verbose=True)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = StepMomentum(
self.optimizer_with_betas, gamma=0.1, step_size=3, verbose=True)
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_multi_step_scheduler(self):
# momentum = 0.05 if epoch < 2
@ -219,10 +264,17 @@ class TestMomentumScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = MultiStepMomentum(
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = MultiStepMomentum(
self.optimizer_with_betas, gamma=0.1, milestones=[2, 5, 9])
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_constant_scheduler(self):
# factor should between 0~1
@ -233,9 +285,16 @@ class TestMomentumScheduler(TestCase):
# momentum = 0.005 if 5 <= epoch
epochs = 10
single_targets = [0.025] * 4 + [0.05] * 6
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ConstantMomentum(self.optimizer, factor=1.0 / 2, end=5)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = ConstantMomentum(
self.optimizer_with_betas, factor=1.0 / 2, end=5)
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_linear_scheduler(self):
with self.assertRaises(ValueError):
@ -259,17 +318,32 @@ class TestMomentumScheduler(TestCase):
]
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearMomentum(
self.optimizer, start_factor=start_factor, end=iters + 1)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = LinearMomentum(
self.optimizer_with_betas,
start_factor=start_factor,
end=iters + 1)
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_exp_scheduler(self):
epochs = 10
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ExponentialMomentum(self.optimizer, gamma=0.9)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = ExponentialMomentum(self.optimizer_with_betas, gamma=0.9)
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_cos_anneal_scheduler(self):
epochs = 12
@ -279,25 +353,46 @@ class TestMomentumScheduler(TestCase):
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
for x in range(epochs)
]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = CosineAnnealingMomentum(
self.optimizer, T_max=t, eta_min=eta_min)
self._test_scheduler_value(scheduler, targets, epochs)
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
scheduler = CosineAnnealingMomentum(
self.optimizer_with_betas, T_max=t, eta_min=eta_min)
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
targets, epochs)
def test_poly_scheduler(self):
epochs = 10
power = 0.9
min_lr = 0.001
iters = 4
single_targets = [
layer1_targets = [
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
layer2_targets = [
min_lr + (0.05 * self.layer2_mult - min_lr) *
(1 - i / iters)**power for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [layer1_targets, layer2_targets]
scheduler = PolyMomentum(
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
self._test_scheduler_value(scheduler, targets, epochs=10)
self._test_scheduler_value(
self.optimizer, scheduler, targets, epochs=10)
scheduler = PolyMomentum(
self.optimizer_with_betas,
power=power,
eta_min=min_lr,
end=iters + 1)
self._test_scheduler_value(
self.optimizer_with_betas, scheduler, targets, epochs=10)
def _check_scheduler_state_dict(self, construct, construct2, epochs=10):
scheduler = construct()
@ -359,12 +454,15 @@ class TestMomentumScheduler(TestCase):
epochs = 12
single_targets = [0.025, 0.03125, 0.0375, 0.04375
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearMomentum(
self.optimizer, start_factor=1 / 2, begin=0, end=5)
scheduler2 = MultiStepMomentum(
self.optimizer, gamma=0.1, milestones=[3, 6], begin=5, end=12)
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
targets, epochs)
def test_multi_scheduler_without_overlap_exp_cosine(self):
# use Exp in the first 5 epochs and then use Cosine
@ -379,23 +477,29 @@ class TestMomentumScheduler(TestCase):
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
]
single_targets = single_targets1 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingMomentum(
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
targets, epochs)
def test_multi_scheduler_with_overlap(self):
# use Linear at first 5 epochs together with MultiStep
epochs = 10
single_targets = [0.025, 0.03125, 0.0375, 0.004375
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearMomentum(
self.optimizer, start_factor=1 / 2, begin=0, end=5)
scheduler2 = MultiStepMomentum(
self.optimizer, gamma=0.1, milestones=[3, 6, 9])
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
targets, epochs)
def test_multi_scheduler_with_gap(self):
# use Exp in the first 5 epochs and the last 5 epochs use Cosine
@ -412,8 +516,11 @@ class TestMomentumScheduler(TestCase):
]
single_targets = single_targets1 + [single_targets1[-1]
] * 5 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingMomentum(
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
targets, epochs)

View File

@ -39,8 +39,22 @@ class TestParameterScheduler(TestCase):
tearDown() -> cleanUp()
"""
self.model = ToyModel()
self.layer2_mult = 10
lr = 0.05
momentum = 0.01
weight_decay = 5e-4
self.optimizer = optim.SGD(
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
[{
'params': self.model.conv1.parameters()
}, {
'params': self.model.conv2.parameters(),
'lr': lr * self.layer2_mult,
'momentum': momentum * self.layer2_mult,
'weight_decay': weight_decay * self.layer2_mult
}],
lr=lr,
momentum=momentum,
weight_decay=weight_decay)
def test_base_scheduler_step(self):
with self.assertRaises(NotImplementedError):
@ -83,8 +97,7 @@ class TestParameterScheduler(TestCase):
results = []
for epoch in range(5):
for param_group in self.optimizer.param_groups:
results.append(param_group['lr'])
results.append(self.optimizer.param_groups[0]['lr'])
# The order should be
# train_epoch() -> save_checkpoint() -> scheduler.step().
# Break at here to simulate the checkpoint is saved before
@ -95,8 +108,7 @@ class TestParameterScheduler(TestCase):
scheduler2 = ExponentialParamScheduler(
self.optimizer, param_name='lr', gamma=0.9, last_step=4)
for epoch in range(6):
for param_group in self.optimizer.param_groups:
results.append(param_group['lr'])
results.append(self.optimizer.param_groups[0]['lr'])
scheduler2.step()
for epoch in range(epochs):
@ -141,7 +153,10 @@ class TestParameterScheduler(TestCase):
def test_get_last_value(self):
epochs = 10
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepParamScheduler(
self.optimizer, param_name='lr', step_size=3, gamma=0.1)
for epoch in range(epochs):
@ -199,7 +214,9 @@ class TestParameterScheduler(TestCase):
single_targets = [0.05] * begin + [x * 0.05
for x in interpolation] + [0.05] * (
epochs - iters - begin)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearParamScheduler(
self.optimizer,
param_name='lr',
@ -241,7 +258,9 @@ class TestParameterScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepParamScheduler(
self.optimizer,
param_name='lr',
@ -254,7 +273,9 @@ class TestParameterScheduler(TestCase):
# momentum = 0.001 if 2 <= epoch < 4
epochs = 4
single_targets = [0.01] * 2 + [0.001] * 2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepParamScheduler(
self.optimizer, param_name='momentum', gamma=0.1, step_size=2)
self._test_scheduler_value(
@ -268,7 +289,9 @@ class TestParameterScheduler(TestCase):
epochs = 10
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
] * 3
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = MultiStepParamScheduler(
self.optimizer, param_name='lr', gamma=0.1, milestones=[2, 5, 9])
self._test_scheduler_value(scheduler, targets, epochs)
@ -282,7 +305,9 @@ class TestParameterScheduler(TestCase):
# lr = 0.005 if 5 <= epoch
epochs = 10
single_targets = [0.025] * 4 + [0.05] * 6
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ConstantParamScheduler(
self.optimizer, param_name='lr', factor=1.0 / 2, end=5)
self._test_scheduler_value(scheduler, targets, epochs)
@ -313,7 +338,9 @@ class TestParameterScheduler(TestCase):
]
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearParamScheduler(
self.optimizer,
param_name='lr',
@ -324,7 +351,9 @@ class TestParameterScheduler(TestCase):
def test_exp_scheduler(self):
epochs = 10
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ExponentialParamScheduler(
self.optimizer, param_name='lr', gamma=0.9)
self._test_scheduler_value(scheduler, targets, epochs)
@ -337,7 +366,9 @@ class TestParameterScheduler(TestCase):
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
for x in range(epochs)
]
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = CosineAnnealingParamScheduler(
self.optimizer, param_name='lr', T_max=t, eta_min=eta_min)
self._test_scheduler_value(scheduler, targets, epochs)
@ -347,12 +378,17 @@ class TestParameterScheduler(TestCase):
power = 0.9
min_lr = 0.001
iters = 4
single_targets = [
targets_layer1 = [
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets_layer2 = [
min_lr + (0.05 * self.layer2_mult - min_lr) *
(1 - i / iters)**power for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [targets_layer1, targets_layer2]
scheduler = PolyParamScheduler(
self.optimizer,
param_name='lr',
@ -451,8 +487,7 @@ class TestParameterScheduler(TestCase):
epoch_length = 7
single_targets = [0.01] * 2 * epoch_length + [0.001] * 2 * epoch_length
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = StepParamScheduler.build_iter_from_epoch(
self.optimizer,
@ -475,8 +510,7 @@ class TestParameterScheduler(TestCase):
0.0005
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = MultiStepParamScheduler.build_iter_from_epoch(
self.optimizer,
@ -494,8 +528,7 @@ class TestParameterScheduler(TestCase):
single_targets = [0.025] * (5 * epoch_length -
1) + [0.05] * (5 * epoch_length + 1)
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ConstantParamScheduler.build_iter_from_epoch(
self.optimizer,
@ -517,7 +550,9 @@ class TestParameterScheduler(TestCase):
]
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
epochs * epoch_length - iters)
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = LinearParamScheduler.build_iter_from_epoch(
self.optimizer,
param_name='lr',
@ -534,8 +569,7 @@ class TestParameterScheduler(TestCase):
0.05 * (0.9**x) for x in range(epochs * epoch_length)
]
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = ExponentialParamScheduler.build_iter_from_epoch(
self.optimizer,
@ -555,8 +589,7 @@ class TestParameterScheduler(TestCase):
for x in range(epochs * epoch_length)
]
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler = CosineAnnealingParamScheduler.build_iter_from_epoch(
self.optimizer,
@ -574,15 +607,17 @@ class TestParameterScheduler(TestCase):
epoch_length = 11
iters = end * epoch_length - 1
single_targets = [
targets_layer1 = [
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [
single_targets,
[x * epochs * epoch_length for x in single_targets]
]
targets_layer2 = [
min_lr + (0.05 * self.layer2_mult - min_lr) *
(1 - i / iters)**power for i in range(iters)
] + [min_lr] * (
epochs - iters)
targets = [targets_layer1, targets_layer2]
scheduler = PolyParamScheduler.build_iter_from_epoch(
self.optimizer,
param_name='lr',
@ -597,7 +632,9 @@ class TestParameterScheduler(TestCase):
epochs = 12
single_targets = [0.025, 0.03125, 0.0375, 0.04375
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearParamScheduler(
self.optimizer,
param_name='lr',
@ -626,7 +663,9 @@ class TestParameterScheduler(TestCase):
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
]
single_targets = single_targets1 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingParamScheduler(
self.optimizer,
param_name='lr',
@ -642,7 +681,9 @@ class TestParameterScheduler(TestCase):
epochs = 10
single_targets = [0.025, 0.03125, 0.0375, 0.004375
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler1 = LinearParamScheduler(
self.optimizer,
param_name='lr',
@ -668,7 +709,9 @@ class TestParameterScheduler(TestCase):
]
single_targets = single_targets1 + [single_targets1[-1]
] * 5 + single_targets2
targets = [single_targets, [x * epochs for x in single_targets]]
targets = [
single_targets, [x * self.layer2_mult for x in single_targets]
]
scheduler2 = CosineAnnealingParamScheduler(
self.optimizer,
param_name='lr',