mirror of
https://github.com/open-mmlab/mmengine.git
synced 2025-06-03 21:54:44 +08:00
[Enhance] Support scheduling betas with MomentumScheduler. (#346)
* [Enhance] Support scheduling betas with MomentumScheduler. * enhance ut * test adam betas * enhance ut * enhance ut
This commit is contained in:
parent
2853045e96
commit
a3d2916790
@ -220,6 +220,17 @@ class OptimWrapper:
|
|||||||
"""
|
"""
|
||||||
return self.optimizer.param_groups
|
return self.optimizer.param_groups
|
||||||
|
|
||||||
|
@property
|
||||||
|
def defaults(self) -> dict:
|
||||||
|
"""A wrapper of ``Optimizer.defaults``.
|
||||||
|
|
||||||
|
Make OptimizeWrapper compatible with :class:`_ParamScheduler`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: the ``param_groups`` of :attr:`optimizer`.
|
||||||
|
"""
|
||||||
|
return self.optimizer.defaults
|
||||||
|
|
||||||
def get_lr(self) -> Dict[str, List[float]]:
|
def get_lr(self) -> Dict[str, List[float]]:
|
||||||
"""Get the learning rate of the optimizer.
|
"""Get the learning rate of the optimizer.
|
||||||
|
|
||||||
|
@ -8,10 +8,37 @@ from .param_scheduler import (ConstantParamScheduler,
|
|||||||
|
|
||||||
|
|
||||||
class MomentumSchedulerMixin:
|
class MomentumSchedulerMixin:
|
||||||
"""A mixin class for momentum schedulers."""
|
"""A mixin class for momentum schedulers.
|
||||||
|
|
||||||
|
It can schedule the momentum in SGD and the beta_0 in Adam series.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, optimizer, *args, **kwargs):
|
def __init__(self, optimizer, *args, **kwargs):
|
||||||
super().__init__(optimizer, 'momentum', *args, **kwargs)
|
self.use_betas = False
|
||||||
|
if 'momentum' in optimizer.defaults:
|
||||||
|
param_name = 'momentum'
|
||||||
|
elif 'betas' in optimizer.defaults:
|
||||||
|
# for Adam series optimizer, the momentum is beta_0
|
||||||
|
self.use_betas = True
|
||||||
|
param_name = 'momentum'
|
||||||
|
for group in optimizer.param_groups:
|
||||||
|
# set a reference momentum in the param groups for scheduling
|
||||||
|
group[param_name] = group['betas'][0]
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
'optimizer must support momentum when using momentum scheduler'
|
||||||
|
)
|
||||||
|
super().__init__(optimizer, param_name, *args, **kwargs)
|
||||||
|
|
||||||
|
def step(self):
|
||||||
|
"""Adjusts the parameter value of each parameter group based on the
|
||||||
|
specified schedule."""
|
||||||
|
super().step()
|
||||||
|
if self.use_betas:
|
||||||
|
for group in self.optimizer.param_groups:
|
||||||
|
_, beta_1 = group['betas']
|
||||||
|
# update the betas with the calculated value
|
||||||
|
group['betas'] = (group['momentum'], beta_1)
|
||||||
|
|
||||||
|
|
||||||
@PARAM_SCHEDULERS.register_module()
|
@PARAM_SCHEDULERS.register_module()
|
||||||
|
@ -1,4 +1,9 @@
|
|||||||
# Copyright (c) OpenMMLab. All rights reserved.
|
# Copyright (c) OpenMMLab. All rights reserved.
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
# Modified from https://github.com/pytorch/pytorch
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import warnings
|
import warnings
|
||||||
import weakref
|
import weakref
|
||||||
|
@ -33,8 +33,17 @@ class TestLRScheduler(TestCase):
|
|||||||
tearDown() -> cleanUp()
|
tearDown() -> cleanUp()
|
||||||
"""
|
"""
|
||||||
self.model = ToyModel()
|
self.model = ToyModel()
|
||||||
self.optimizer = optim.SGD(
|
lr = 0.05
|
||||||
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
|
self.layer2_mult = 10
|
||||||
|
self.optimizer = optim.SGD([{
|
||||||
|
'params': self.model.conv1.parameters()
|
||||||
|
}, {
|
||||||
|
'params': self.model.conv2.parameters(),
|
||||||
|
'lr': lr * self.layer2_mult,
|
||||||
|
}],
|
||||||
|
lr=lr,
|
||||||
|
momentum=0.01,
|
||||||
|
weight_decay=5e-4)
|
||||||
|
|
||||||
def test_base_scheduler_step(self):
|
def test_base_scheduler_step(self):
|
||||||
with self.assertRaises(NotImplementedError):
|
with self.assertRaises(NotImplementedError):
|
||||||
@ -69,20 +78,18 @@ class TestLRScheduler(TestCase):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for epoch in range(5):
|
for epoch in range(5):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['lr'])
|
||||||
results.append(param_group['lr'])
|
# The order should be
|
||||||
# The order should be
|
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
# Break at here to simulate the checkpoint is saved before
|
||||||
# Break at here to simulate the checkpoint is saved before
|
# the scheduler.step().
|
||||||
# the scheduler.step().
|
if epoch == 4:
|
||||||
if epoch == 4:
|
break
|
||||||
break
|
scheduler.step()
|
||||||
scheduler.step()
|
|
||||||
scheduler2 = ExponentialLR(self.optimizer, gamma=0.9, last_step=4)
|
scheduler2 = ExponentialLR(self.optimizer, gamma=0.9, last_step=4)
|
||||||
for epoch in range(6):
|
for epoch in range(6):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['lr'])
|
||||||
results.append(param_group['lr'])
|
scheduler2.step()
|
||||||
scheduler2.step()
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
assert_allclose(
|
assert_allclose(
|
||||||
@ -121,7 +128,10 @@ class TestLRScheduler(TestCase):
|
|||||||
|
|
||||||
def test_get_last_value(self):
|
def test_get_last_value(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||||
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepLR(self.optimizer, 3, gamma=0.1)
|
scheduler = StepLR(self.optimizer, 3, gamma=0.1)
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
result = scheduler.get_last_value()
|
result = scheduler.get_last_value()
|
||||||
@ -171,7 +181,9 @@ class TestLRScheduler(TestCase):
|
|||||||
single_targets = [0.05] * begin + [x * 0.05
|
single_targets = [0.05] * begin + [x * 0.05
|
||||||
for x in interpolation] + [0.05] * (
|
for x in interpolation] + [0.05] * (
|
||||||
epochs - iters - begin)
|
epochs - iters - begin)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearLR(
|
scheduler = LinearLR(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
start_factor=start_factor,
|
start_factor=start_factor,
|
||||||
@ -207,7 +219,9 @@ class TestLRScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepLR(
|
scheduler = StepLR(
|
||||||
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -220,7 +234,9 @@ class TestLRScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = MultiStepLR(
|
scheduler = MultiStepLR(
|
||||||
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -234,7 +250,9 @@ class TestLRScheduler(TestCase):
|
|||||||
# lr = 0.005 if 5 <= epoch
|
# lr = 0.005 if 5 <= epoch
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025] * 4 + [0.05] * 6
|
single_targets = [0.025] * 4 + [0.05] * 6
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ConstantLR(self.optimizer, factor=1.0 / 2, end=5)
|
scheduler = ConstantLR(self.optimizer, factor=1.0 / 2, end=5)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
|
|
||||||
@ -260,7 +278,9 @@ class TestLRScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearLR(
|
scheduler = LinearLR(
|
||||||
self.optimizer, start_factor=start_factor, end=iters + 1)
|
self.optimizer, start_factor=start_factor, end=iters + 1)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -268,7 +288,9 @@ class TestLRScheduler(TestCase):
|
|||||||
def test_exp_scheduler(self):
|
def test_exp_scheduler(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ExponentialLR(self.optimizer, gamma=0.9)
|
scheduler = ExponentialLR(self.optimizer, gamma=0.9)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
|
|
||||||
@ -280,7 +302,9 @@ class TestLRScheduler(TestCase):
|
|||||||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||||
for x in range(epochs)
|
for x in range(epochs)
|
||||||
]
|
]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = CosineAnnealingLR(self.optimizer, T_max=t, eta_min=eta_min)
|
scheduler = CosineAnnealingLR(self.optimizer, T_max=t, eta_min=eta_min)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
|
|
||||||
@ -289,12 +313,17 @@ class TestLRScheduler(TestCase):
|
|||||||
power = 0.9
|
power = 0.9
|
||||||
min_lr = 0.001
|
min_lr = 0.001
|
||||||
iters = 4
|
iters = 4
|
||||||
single_targets = [
|
targets_layer1 = [
|
||||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||||
for i in range(iters)
|
for i in range(iters)
|
||||||
] + [min_lr] * (
|
] + [min_lr] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets_layer2 = [
|
||||||
|
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||||
|
(1 - i / iters)**power for i in range(iters)
|
||||||
|
] + [min_lr] * (
|
||||||
|
epochs - iters)
|
||||||
|
targets = [targets_layer1, targets_layer2]
|
||||||
scheduler = PolyLR(
|
scheduler = PolyLR(
|
||||||
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs=10)
|
self._test_scheduler_value(scheduler, targets, epochs=10)
|
||||||
@ -365,8 +394,7 @@ class TestLRScheduler(TestCase):
|
|||||||
epoch_length = 7
|
epoch_length = 7
|
||||||
single_targets = [0.05] * 2 * epoch_length + [0.005] * 2 * epoch_length
|
single_targets = [0.05] * 2 * epoch_length + [0.005] * 2 * epoch_length
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = StepLR.build_iter_from_epoch(
|
scheduler = StepLR.build_iter_from_epoch(
|
||||||
self.optimizer, gamma=0.1, step_size=2, epoch_length=epoch_length)
|
self.optimizer, gamma=0.1, step_size=2, epoch_length=epoch_length)
|
||||||
@ -385,8 +413,7 @@ class TestLRScheduler(TestCase):
|
|||||||
0.0005
|
0.0005
|
||||||
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = MultiStepLR.build_iter_from_epoch(
|
scheduler = MultiStepLR.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -403,8 +430,7 @@ class TestLRScheduler(TestCase):
|
|||||||
single_targets = [0.025] * (5 * epoch_length -
|
single_targets = [0.025] * (5 * epoch_length -
|
||||||
1) + [0.05] * (5 * epoch_length + 1)
|
1) + [0.05] * (5 * epoch_length + 1)
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = ConstantLR.build_iter_from_epoch(
|
scheduler = ConstantLR.build_iter_from_epoch(
|
||||||
self.optimizer, factor=1.0 / 2, end=5, epoch_length=epoch_length)
|
self.optimizer, factor=1.0 / 2, end=5, epoch_length=epoch_length)
|
||||||
@ -422,7 +448,9 @@ class TestLRScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||||
epochs * epoch_length - iters)
|
epochs * epoch_length - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearLR.build_iter_from_epoch(
|
scheduler = LinearLR.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
start_factor=start_factor,
|
start_factor=start_factor,
|
||||||
@ -438,8 +466,7 @@ class TestLRScheduler(TestCase):
|
|||||||
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
||||||
]
|
]
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = ExponentialLR.build_iter_from_epoch(
|
scheduler = ExponentialLR.build_iter_from_epoch(
|
||||||
self.optimizer, gamma=0.9, epoch_length=epoch_length)
|
self.optimizer, gamma=0.9, epoch_length=epoch_length)
|
||||||
@ -456,8 +483,7 @@ class TestLRScheduler(TestCase):
|
|||||||
for x in range(epochs * epoch_length)
|
for x in range(epochs * epoch_length)
|
||||||
]
|
]
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = CosineAnnealingLR.build_iter_from_epoch(
|
scheduler = CosineAnnealingLR.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -474,15 +500,17 @@ class TestLRScheduler(TestCase):
|
|||||||
epoch_length = 11
|
epoch_length = 11
|
||||||
|
|
||||||
iters = end * epoch_length - 1
|
iters = end * epoch_length - 1
|
||||||
single_targets = [
|
targets_layer1 = [
|
||||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||||
for i in range(iters)
|
for i in range(iters)
|
||||||
] + [min_lr] * (
|
] + [min_lr] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [
|
targets_layer2 = [
|
||||||
single_targets,
|
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
(1 - i / iters)**power for i in range(iters)
|
||||||
]
|
] + [min_lr] * (
|
||||||
|
epochs - iters)
|
||||||
|
targets = [targets_layer1, targets_layer2]
|
||||||
scheduler = PolyLR.build_iter_from_epoch(
|
scheduler = PolyLR.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
power=power,
|
power=power,
|
||||||
@ -496,7 +524,9 @@ class TestLRScheduler(TestCase):
|
|||||||
epochs = 12
|
epochs = 12
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearLR(
|
scheduler1 = LinearLR(
|
||||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||||
scheduler2 = MultiStepLR(
|
scheduler2 = MultiStepLR(
|
||||||
@ -515,7 +545,9 @@ class TestLRScheduler(TestCase):
|
|||||||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||||
]
|
]
|
||||||
single_targets = single_targets1 + single_targets2
|
single_targets = single_targets1 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingLR(
|
scheduler2 = CosineAnnealingLR(
|
||||||
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
||||||
|
|
||||||
@ -526,7 +558,9 @@ class TestLRScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearLR(
|
scheduler1 = LinearLR(
|
||||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||||
scheduler2 = MultiStepLR(
|
scheduler2 = MultiStepLR(
|
||||||
@ -547,7 +581,9 @@ class TestLRScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = single_targets1 + [single_targets1[-1]
|
single_targets = single_targets1 + [single_targets1[-1]
|
||||||
] * 5 + single_targets2
|
] * 5 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingLR(
|
scheduler2 = CosineAnnealingLR(
|
||||||
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
||||||
|
|
||||||
|
@ -34,12 +34,38 @@ class TestMomentumScheduler(TestCase):
|
|||||||
tearDown() -> cleanUp()
|
tearDown() -> cleanUp()
|
||||||
"""
|
"""
|
||||||
self.model = ToyModel()
|
self.model = ToyModel()
|
||||||
self.optimizer = optim.SGD(
|
momentum = 0.05
|
||||||
self.model.parameters(), lr=0.01, momentum=0.05, weight_decay=5e-4)
|
self.layer2_mult = 10
|
||||||
|
self.optimizer = optim.SGD([{
|
||||||
|
'params': self.model.conv1.parameters()
|
||||||
|
}, {
|
||||||
|
'params': self.model.conv2.parameters(),
|
||||||
|
'momentum': momentum * self.layer2_mult
|
||||||
|
}],
|
||||||
|
lr=0.01,
|
||||||
|
momentum=momentum,
|
||||||
|
weight_decay=5e-4)
|
||||||
|
self.optimizer_with_betas = optim.Adam(
|
||||||
|
[{
|
||||||
|
'params': self.model.conv1.parameters()
|
||||||
|
}, {
|
||||||
|
'params': self.model.conv2.parameters(),
|
||||||
|
'betas': (momentum * self.layer2_mult, 0.999)
|
||||||
|
}],
|
||||||
|
lr=0.01,
|
||||||
|
betas=(momentum, 0.999),
|
||||||
|
weight_decay=5e-4)
|
||||||
|
|
||||||
def test_invalid_optimizer(self):
|
def test_invalid_optimizer(self):
|
||||||
with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
|
with self.assertRaisesRegex(
|
||||||
StepMomentum('invalid_optimizer', step_size=1)
|
ValueError,
|
||||||
|
'optimizer must support momentum when using momentum scheduler'
|
||||||
|
):
|
||||||
|
optimizer = optim.ASGD(
|
||||||
|
self.model.parameters(),
|
||||||
|
lr=0.01,
|
||||||
|
)
|
||||||
|
StepMomentum(optimizer, step_size=1)
|
||||||
|
|
||||||
def test_overwrite_optimzer_step(self):
|
def test_overwrite_optimzer_step(self):
|
||||||
# raise warning if the counter in optimizer.step() is overwritten
|
# raise warning if the counter in optimizer.step() is overwritten
|
||||||
@ -66,21 +92,19 @@ class TestMomentumScheduler(TestCase):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for epoch in range(5):
|
for epoch in range(5):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['momentum'])
|
||||||
results.append(param_group['momentum'])
|
# The order should be
|
||||||
# The order should be
|
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
# Break at here to simulate the checkpoint is saved before
|
||||||
# Break at here to simulate the checkpoint is saved before
|
# the scheduler.step().
|
||||||
# the scheduler.step().
|
if epoch == 4:
|
||||||
if epoch == 4:
|
break
|
||||||
break
|
scheduler.step()
|
||||||
scheduler.step()
|
|
||||||
scheduler2 = ExponentialMomentum(
|
scheduler2 = ExponentialMomentum(
|
||||||
self.optimizer, gamma=0.9, last_step=4)
|
self.optimizer, gamma=0.9, last_step=4)
|
||||||
for epoch in range(6):
|
for epoch in range(6):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['momentum'])
|
||||||
results.append(param_group['momentum'])
|
scheduler2.step()
|
||||||
scheduler2.step()
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
assert_allclose(
|
assert_allclose(
|
||||||
@ -119,7 +143,10 @@ class TestMomentumScheduler(TestCase):
|
|||||||
|
|
||||||
def test_get_last_value(self):
|
def test_get_last_value(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||||
|
targets = [
|
||||||
|
single_targets, [t * self.layer2_mult for t in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepMomentum(self.optimizer, 3, gamma=0.1)
|
scheduler = StepMomentum(self.optimizer, 3, gamma=0.1)
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
result = scheduler.get_last_value()
|
result = scheduler.get_last_value()
|
||||||
@ -170,15 +197,18 @@ class TestMomentumScheduler(TestCase):
|
|||||||
single_targets = [0.05] * begin + [x * 0.05
|
single_targets = [0.05] * begin + [x * 0.05
|
||||||
for x in interpolation] + [0.05] * (
|
for x in interpolation] + [0.05] * (
|
||||||
epochs - iters - begin)
|
epochs - iters - begin)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearMomentum(
|
scheduler = LinearMomentum(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
start_factor=start_factor,
|
start_factor=start_factor,
|
||||||
begin=begin,
|
begin=begin,
|
||||||
end=begin + iters + 1)
|
end=begin + iters + 1)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
def _test_scheduler_value(self,
|
def _test_scheduler_value(self,
|
||||||
|
optimizer,
|
||||||
schedulers,
|
schedulers,
|
||||||
targets,
|
targets,
|
||||||
epochs=10,
|
epochs=10,
|
||||||
@ -186,8 +216,7 @@ class TestMomentumScheduler(TestCase):
|
|||||||
if isinstance(schedulers, _ParamScheduler):
|
if isinstance(schedulers, _ParamScheduler):
|
||||||
schedulers = [schedulers]
|
schedulers = [schedulers]
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
for param_group, target in zip(self.optimizer.param_groups,
|
for param_group, target in zip(optimizer.param_groups, targets):
|
||||||
targets):
|
|
||||||
assert_allclose(
|
assert_allclose(
|
||||||
target[epoch],
|
target[epoch],
|
||||||
param_group[param_name],
|
param_group[param_name],
|
||||||
@ -196,6 +225,15 @@ class TestMomentumScheduler(TestCase):
|
|||||||
param_group[param_name]),
|
param_group[param_name]),
|
||||||
atol=1e-5,
|
atol=1e-5,
|
||||||
rtol=0)
|
rtol=0)
|
||||||
|
if 'betas' in optimizer.defaults:
|
||||||
|
assert_allclose(
|
||||||
|
target[epoch],
|
||||||
|
param_group['betas'][0],
|
||||||
|
msg='{} is wrong in epoch {}: expected {}, got {}'.
|
||||||
|
format('betas_0', epoch, target[epoch],
|
||||||
|
param_group['betas'][0]),
|
||||||
|
atol=1e-5,
|
||||||
|
rtol=0)
|
||||||
[scheduler.step() for scheduler in schedulers]
|
[scheduler.step() for scheduler in schedulers]
|
||||||
|
|
||||||
def test_step_scheduler(self):
|
def test_step_scheduler(self):
|
||||||
@ -206,10 +244,17 @@ class TestMomentumScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepMomentum(
|
scheduler = StepMomentum(
|
||||||
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = StepMomentum(
|
||||||
|
self.optimizer_with_betas, gamma=0.1, step_size=3, verbose=True)
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_multi_step_scheduler(self):
|
def test_multi_step_scheduler(self):
|
||||||
# momentum = 0.05 if epoch < 2
|
# momentum = 0.05 if epoch < 2
|
||||||
@ -219,10 +264,17 @@ class TestMomentumScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = MultiStepMomentum(
|
scheduler = MultiStepMomentum(
|
||||||
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = MultiStepMomentum(
|
||||||
|
self.optimizer_with_betas, gamma=0.1, milestones=[2, 5, 9])
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_constant_scheduler(self):
|
def test_constant_scheduler(self):
|
||||||
# factor should between 0~1
|
# factor should between 0~1
|
||||||
@ -233,9 +285,16 @@ class TestMomentumScheduler(TestCase):
|
|||||||
# momentum = 0.005 if 5 <= epoch
|
# momentum = 0.005 if 5 <= epoch
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025] * 4 + [0.05] * 6
|
single_targets = [0.025] * 4 + [0.05] * 6
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ConstantMomentum(self.optimizer, factor=1.0 / 2, end=5)
|
scheduler = ConstantMomentum(self.optimizer, factor=1.0 / 2, end=5)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = ConstantMomentum(
|
||||||
|
self.optimizer_with_betas, factor=1.0 / 2, end=5)
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_linear_scheduler(self):
|
def test_linear_scheduler(self):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
@ -259,17 +318,32 @@ class TestMomentumScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearMomentum(
|
scheduler = LinearMomentum(
|
||||||
self.optimizer, start_factor=start_factor, end=iters + 1)
|
self.optimizer, start_factor=start_factor, end=iters + 1)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = LinearMomentum(
|
||||||
|
self.optimizer_with_betas,
|
||||||
|
start_factor=start_factor,
|
||||||
|
end=iters + 1)
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_exp_scheduler(self):
|
def test_exp_scheduler(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ExponentialMomentum(self.optimizer, gamma=0.9)
|
scheduler = ExponentialMomentum(self.optimizer, gamma=0.9)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = ExponentialMomentum(self.optimizer_with_betas, gamma=0.9)
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_cos_anneal_scheduler(self):
|
def test_cos_anneal_scheduler(self):
|
||||||
epochs = 12
|
epochs = 12
|
||||||
@ -279,25 +353,46 @@ class TestMomentumScheduler(TestCase):
|
|||||||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||||
for x in range(epochs)
|
for x in range(epochs)
|
||||||
]
|
]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = CosineAnnealingMomentum(
|
scheduler = CosineAnnealingMomentum(
|
||||||
self.optimizer, T_max=t, eta_min=eta_min)
|
self.optimizer, T_max=t, eta_min=eta_min)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||||
|
|
||||||
|
scheduler = CosineAnnealingMomentum(
|
||||||
|
self.optimizer_with_betas, T_max=t, eta_min=eta_min)
|
||||||
|
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_poly_scheduler(self):
|
def test_poly_scheduler(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
power = 0.9
|
power = 0.9
|
||||||
min_lr = 0.001
|
min_lr = 0.001
|
||||||
iters = 4
|
iters = 4
|
||||||
single_targets = [
|
layer1_targets = [
|
||||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||||
for i in range(iters)
|
for i in range(iters)
|
||||||
] + [min_lr] * (
|
] + [min_lr] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
layer2_targets = [
|
||||||
|
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||||
|
(1 - i / iters)**power for i in range(iters)
|
||||||
|
] + [min_lr] * (
|
||||||
|
epochs - iters)
|
||||||
|
targets = [layer1_targets, layer2_targets]
|
||||||
scheduler = PolyMomentum(
|
scheduler = PolyMomentum(
|
||||||
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs=10)
|
self._test_scheduler_value(
|
||||||
|
self.optimizer, scheduler, targets, epochs=10)
|
||||||
|
|
||||||
|
scheduler = PolyMomentum(
|
||||||
|
self.optimizer_with_betas,
|
||||||
|
power=power,
|
||||||
|
eta_min=min_lr,
|
||||||
|
end=iters + 1)
|
||||||
|
self._test_scheduler_value(
|
||||||
|
self.optimizer_with_betas, scheduler, targets, epochs=10)
|
||||||
|
|
||||||
def _check_scheduler_state_dict(self, construct, construct2, epochs=10):
|
def _check_scheduler_state_dict(self, construct, construct2, epochs=10):
|
||||||
scheduler = construct()
|
scheduler = construct()
|
||||||
@ -359,12 +454,15 @@ class TestMomentumScheduler(TestCase):
|
|||||||
epochs = 12
|
epochs = 12
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearMomentum(
|
scheduler1 = LinearMomentum(
|
||||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||||
scheduler2 = MultiStepMomentum(
|
scheduler2 = MultiStepMomentum(
|
||||||
self.optimizer, gamma=0.1, milestones=[3, 6], begin=5, end=12)
|
self.optimizer, gamma=0.1, milestones=[3, 6], begin=5, end=12)
|
||||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_multi_scheduler_without_overlap_exp_cosine(self):
|
def test_multi_scheduler_without_overlap_exp_cosine(self):
|
||||||
# use Exp in the first 5 epochs and then use Cosine
|
# use Exp in the first 5 epochs and then use Cosine
|
||||||
@ -379,23 +477,29 @@ class TestMomentumScheduler(TestCase):
|
|||||||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||||
]
|
]
|
||||||
single_targets = single_targets1 + single_targets2
|
single_targets = single_targets1 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingMomentum(
|
scheduler2 = CosineAnnealingMomentum(
|
||||||
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
||||||
|
|
||||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_multi_scheduler_with_overlap(self):
|
def test_multi_scheduler_with_overlap(self):
|
||||||
# use Linear at first 5 epochs together with MultiStep
|
# use Linear at first 5 epochs together with MultiStep
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearMomentum(
|
scheduler1 = LinearMomentum(
|
||||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||||
scheduler2 = MultiStepMomentum(
|
scheduler2 = MultiStepMomentum(
|
||||||
self.optimizer, gamma=0.1, milestones=[3, 6, 9])
|
self.optimizer, gamma=0.1, milestones=[3, 6, 9])
|
||||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||||
|
targets, epochs)
|
||||||
|
|
||||||
def test_multi_scheduler_with_gap(self):
|
def test_multi_scheduler_with_gap(self):
|
||||||
# use Exp in the first 5 epochs and the last 5 epochs use Cosine
|
# use Exp in the first 5 epochs and the last 5 epochs use Cosine
|
||||||
@ -412,8 +516,11 @@ class TestMomentumScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = single_targets1 + [single_targets1[-1]
|
single_targets = single_targets1 + [single_targets1[-1]
|
||||||
] * 5 + single_targets2
|
] * 5 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingMomentum(
|
scheduler2 = CosineAnnealingMomentum(
|
||||||
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
||||||
|
|
||||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||||
|
targets, epochs)
|
||||||
|
@ -39,8 +39,22 @@ class TestParameterScheduler(TestCase):
|
|||||||
tearDown() -> cleanUp()
|
tearDown() -> cleanUp()
|
||||||
"""
|
"""
|
||||||
self.model = ToyModel()
|
self.model = ToyModel()
|
||||||
|
self.layer2_mult = 10
|
||||||
|
lr = 0.05
|
||||||
|
momentum = 0.01
|
||||||
|
weight_decay = 5e-4
|
||||||
self.optimizer = optim.SGD(
|
self.optimizer = optim.SGD(
|
||||||
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
|
[{
|
||||||
|
'params': self.model.conv1.parameters()
|
||||||
|
}, {
|
||||||
|
'params': self.model.conv2.parameters(),
|
||||||
|
'lr': lr * self.layer2_mult,
|
||||||
|
'momentum': momentum * self.layer2_mult,
|
||||||
|
'weight_decay': weight_decay * self.layer2_mult
|
||||||
|
}],
|
||||||
|
lr=lr,
|
||||||
|
momentum=momentum,
|
||||||
|
weight_decay=weight_decay)
|
||||||
|
|
||||||
def test_base_scheduler_step(self):
|
def test_base_scheduler_step(self):
|
||||||
with self.assertRaises(NotImplementedError):
|
with self.assertRaises(NotImplementedError):
|
||||||
@ -83,21 +97,19 @@ class TestParameterScheduler(TestCase):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for epoch in range(5):
|
for epoch in range(5):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['lr'])
|
||||||
results.append(param_group['lr'])
|
# The order should be
|
||||||
# The order should be
|
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
# Break at here to simulate the checkpoint is saved before
|
||||||
# Break at here to simulate the checkpoint is saved before
|
# the scheduler.step().
|
||||||
# the scheduler.step().
|
if epoch == 4:
|
||||||
if epoch == 4:
|
break
|
||||||
break
|
scheduler.step()
|
||||||
scheduler.step()
|
|
||||||
scheduler2 = ExponentialParamScheduler(
|
scheduler2 = ExponentialParamScheduler(
|
||||||
self.optimizer, param_name='lr', gamma=0.9, last_step=4)
|
self.optimizer, param_name='lr', gamma=0.9, last_step=4)
|
||||||
for epoch in range(6):
|
for epoch in range(6):
|
||||||
for param_group in self.optimizer.param_groups:
|
results.append(self.optimizer.param_groups[0]['lr'])
|
||||||
results.append(param_group['lr'])
|
scheduler2.step()
|
||||||
scheduler2.step()
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
assert_allclose(
|
assert_allclose(
|
||||||
@ -141,7 +153,10 @@ class TestParameterScheduler(TestCase):
|
|||||||
|
|
||||||
def test_get_last_value(self):
|
def test_get_last_value(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||||
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepParamScheduler(
|
scheduler = StepParamScheduler(
|
||||||
self.optimizer, param_name='lr', step_size=3, gamma=0.1)
|
self.optimizer, param_name='lr', step_size=3, gamma=0.1)
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
@ -199,7 +214,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
single_targets = [0.05] * begin + [x * 0.05
|
single_targets = [0.05] * begin + [x * 0.05
|
||||||
for x in interpolation] + [0.05] * (
|
for x in interpolation] + [0.05] * (
|
||||||
epochs - iters - begin)
|
epochs - iters - begin)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearParamScheduler(
|
scheduler = LinearParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -241,7 +258,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepParamScheduler(
|
scheduler = StepParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -254,7 +273,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
# momentum = 0.001 if 2 <= epoch < 4
|
# momentum = 0.001 if 2 <= epoch < 4
|
||||||
epochs = 4
|
epochs = 4
|
||||||
single_targets = [0.01] * 2 + [0.001] * 2
|
single_targets = [0.01] * 2 + [0.001] * 2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = StepParamScheduler(
|
scheduler = StepParamScheduler(
|
||||||
self.optimizer, param_name='momentum', gamma=0.1, step_size=2)
|
self.optimizer, param_name='momentum', gamma=0.1, step_size=2)
|
||||||
self._test_scheduler_value(
|
self._test_scheduler_value(
|
||||||
@ -268,7 +289,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||||
] * 3
|
] * 3
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = MultiStepParamScheduler(
|
scheduler = MultiStepParamScheduler(
|
||||||
self.optimizer, param_name='lr', gamma=0.1, milestones=[2, 5, 9])
|
self.optimizer, param_name='lr', gamma=0.1, milestones=[2, 5, 9])
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -282,7 +305,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
# lr = 0.005 if 5 <= epoch
|
# lr = 0.005 if 5 <= epoch
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025] * 4 + [0.05] * 6
|
single_targets = [0.025] * 4 + [0.05] * 6
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ConstantParamScheduler(
|
scheduler = ConstantParamScheduler(
|
||||||
self.optimizer, param_name='lr', factor=1.0 / 2, end=5)
|
self.optimizer, param_name='lr', factor=1.0 / 2, end=5)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -313,7 +338,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearParamScheduler(
|
scheduler = LinearParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -324,7 +351,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
def test_exp_scheduler(self):
|
def test_exp_scheduler(self):
|
||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = ExponentialParamScheduler(
|
scheduler = ExponentialParamScheduler(
|
||||||
self.optimizer, param_name='lr', gamma=0.9)
|
self.optimizer, param_name='lr', gamma=0.9)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -337,7 +366,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||||
for x in range(epochs)
|
for x in range(epochs)
|
||||||
]
|
]
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = CosineAnnealingParamScheduler(
|
scheduler = CosineAnnealingParamScheduler(
|
||||||
self.optimizer, param_name='lr', T_max=t, eta_min=eta_min)
|
self.optimizer, param_name='lr', T_max=t, eta_min=eta_min)
|
||||||
self._test_scheduler_value(scheduler, targets, epochs)
|
self._test_scheduler_value(scheduler, targets, epochs)
|
||||||
@ -347,12 +378,17 @@ class TestParameterScheduler(TestCase):
|
|||||||
power = 0.9
|
power = 0.9
|
||||||
min_lr = 0.001
|
min_lr = 0.001
|
||||||
iters = 4
|
iters = 4
|
||||||
single_targets = [
|
targets_layer1 = [
|
||||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||||
for i in range(iters)
|
for i in range(iters)
|
||||||
] + [min_lr] * (
|
] + [min_lr] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets_layer2 = [
|
||||||
|
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||||
|
(1 - i / iters)**power for i in range(iters)
|
||||||
|
] + [min_lr] * (
|
||||||
|
epochs - iters)
|
||||||
|
targets = [targets_layer1, targets_layer2]
|
||||||
scheduler = PolyParamScheduler(
|
scheduler = PolyParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -451,8 +487,7 @@ class TestParameterScheduler(TestCase):
|
|||||||
epoch_length = 7
|
epoch_length = 7
|
||||||
single_targets = [0.01] * 2 * epoch_length + [0.001] * 2 * epoch_length
|
single_targets = [0.01] * 2 * epoch_length + [0.001] * 2 * epoch_length
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = StepParamScheduler.build_iter_from_epoch(
|
scheduler = StepParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -475,8 +510,7 @@ class TestParameterScheduler(TestCase):
|
|||||||
0.0005
|
0.0005
|
||||||
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = MultiStepParamScheduler.build_iter_from_epoch(
|
scheduler = MultiStepParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -494,8 +528,7 @@ class TestParameterScheduler(TestCase):
|
|||||||
single_targets = [0.025] * (5 * epoch_length -
|
single_targets = [0.025] * (5 * epoch_length -
|
||||||
1) + [0.05] * (5 * epoch_length + 1)
|
1) + [0.05] * (5 * epoch_length + 1)
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = ConstantParamScheduler.build_iter_from_epoch(
|
scheduler = ConstantParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -517,7 +550,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||||
epochs * epoch_length - iters)
|
epochs * epoch_length - iters)
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler = LinearParamScheduler.build_iter_from_epoch(
|
scheduler = LinearParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -534,8 +569,7 @@ class TestParameterScheduler(TestCase):
|
|||||||
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
||||||
]
|
]
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = ExponentialParamScheduler.build_iter_from_epoch(
|
scheduler = ExponentialParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -555,8 +589,7 @@ class TestParameterScheduler(TestCase):
|
|||||||
for x in range(epochs * epoch_length)
|
for x in range(epochs * epoch_length)
|
||||||
]
|
]
|
||||||
targets = [
|
targets = [
|
||||||
single_targets,
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
|
||||||
]
|
]
|
||||||
scheduler = CosineAnnealingParamScheduler.build_iter_from_epoch(
|
scheduler = CosineAnnealingParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
@ -574,15 +607,17 @@ class TestParameterScheduler(TestCase):
|
|||||||
epoch_length = 11
|
epoch_length = 11
|
||||||
|
|
||||||
iters = end * epoch_length - 1
|
iters = end * epoch_length - 1
|
||||||
single_targets = [
|
targets_layer1 = [
|
||||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||||
for i in range(iters)
|
for i in range(iters)
|
||||||
] + [min_lr] * (
|
] + [min_lr] * (
|
||||||
epochs - iters)
|
epochs - iters)
|
||||||
targets = [
|
targets_layer2 = [
|
||||||
single_targets,
|
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||||
[x * epochs * epoch_length for x in single_targets]
|
(1 - i / iters)**power for i in range(iters)
|
||||||
]
|
] + [min_lr] * (
|
||||||
|
epochs - iters)
|
||||||
|
targets = [targets_layer1, targets_layer2]
|
||||||
scheduler = PolyParamScheduler.build_iter_from_epoch(
|
scheduler = PolyParamScheduler.build_iter_from_epoch(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -597,7 +632,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
epochs = 12
|
epochs = 12
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearParamScheduler(
|
scheduler1 = LinearParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -626,7 +663,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||||
]
|
]
|
||||||
single_targets = single_targets1 + single_targets2
|
single_targets = single_targets1 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingParamScheduler(
|
scheduler2 = CosineAnnealingParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -642,7 +681,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
epochs = 10
|
epochs = 10
|
||||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler1 = LinearParamScheduler(
|
scheduler1 = LinearParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
@ -668,7 +709,9 @@ class TestParameterScheduler(TestCase):
|
|||||||
]
|
]
|
||||||
single_targets = single_targets1 + [single_targets1[-1]
|
single_targets = single_targets1 + [single_targets1[-1]
|
||||||
] * 5 + single_targets2
|
] * 5 + single_targets2
|
||||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
targets = [
|
||||||
|
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||||
|
]
|
||||||
scheduler2 = CosineAnnealingParamScheduler(
|
scheduler2 = CosineAnnealingParamScheduler(
|
||||||
self.optimizer,
|
self.optimizer,
|
||||||
param_name='lr',
|
param_name='lr',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user