[Enhance] Support scheduling betas with MomentumScheduler. (#346)
* [Enhance] Support scheduling betas with MomentumScheduler. * enhance ut * test adam betas * enhance ut * enhance utpull/354/head
parent
2853045e96
commit
a3d2916790
|
@ -220,6 +220,17 @@ class OptimWrapper:
|
|||
"""
|
||||
return self.optimizer.param_groups
|
||||
|
||||
@property
|
||||
def defaults(self) -> dict:
|
||||
"""A wrapper of ``Optimizer.defaults``.
|
||||
|
||||
Make OptimizeWrapper compatible with :class:`_ParamScheduler`.
|
||||
|
||||
Returns:
|
||||
dict: the ``param_groups`` of :attr:`optimizer`.
|
||||
"""
|
||||
return self.optimizer.defaults
|
||||
|
||||
def get_lr(self) -> Dict[str, List[float]]:
|
||||
"""Get the learning rate of the optimizer.
|
||||
|
||||
|
|
|
@ -8,10 +8,37 @@ from .param_scheduler import (ConstantParamScheduler,
|
|||
|
||||
|
||||
class MomentumSchedulerMixin:
|
||||
"""A mixin class for momentum schedulers."""
|
||||
"""A mixin class for momentum schedulers.
|
||||
|
||||
It can schedule the momentum in SGD and the beta_0 in Adam series.
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer, *args, **kwargs):
|
||||
super().__init__(optimizer, 'momentum', *args, **kwargs)
|
||||
self.use_betas = False
|
||||
if 'momentum' in optimizer.defaults:
|
||||
param_name = 'momentum'
|
||||
elif 'betas' in optimizer.defaults:
|
||||
# for Adam series optimizer, the momentum is beta_0
|
||||
self.use_betas = True
|
||||
param_name = 'momentum'
|
||||
for group in optimizer.param_groups:
|
||||
# set a reference momentum in the param groups for scheduling
|
||||
group[param_name] = group['betas'][0]
|
||||
else:
|
||||
raise ValueError(
|
||||
'optimizer must support momentum when using momentum scheduler'
|
||||
)
|
||||
super().__init__(optimizer, param_name, *args, **kwargs)
|
||||
|
||||
def step(self):
|
||||
"""Adjusts the parameter value of each parameter group based on the
|
||||
specified schedule."""
|
||||
super().step()
|
||||
if self.use_betas:
|
||||
for group in self.optimizer.param_groups:
|
||||
_, beta_1 = group['betas']
|
||||
# update the betas with the calculated value
|
||||
group['betas'] = (group['momentum'], beta_1)
|
||||
|
||||
|
||||
@PARAM_SCHEDULERS.register_module()
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# ------------------------------------------------------------------------
|
||||
# Modified from https://github.com/pytorch/pytorch
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
import math
|
||||
import warnings
|
||||
import weakref
|
||||
|
|
|
@ -33,8 +33,17 @@ class TestLRScheduler(TestCase):
|
|||
tearDown() -> cleanUp()
|
||||
"""
|
||||
self.model = ToyModel()
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
|
||||
lr = 0.05
|
||||
self.layer2_mult = 10
|
||||
self.optimizer = optim.SGD([{
|
||||
'params': self.model.conv1.parameters()
|
||||
}, {
|
||||
'params': self.model.conv2.parameters(),
|
||||
'lr': lr * self.layer2_mult,
|
||||
}],
|
||||
lr=lr,
|
||||
momentum=0.01,
|
||||
weight_decay=5e-4)
|
||||
|
||||
def test_base_scheduler_step(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
|
@ -69,20 +78,18 @@ class TestLRScheduler(TestCase):
|
|||
|
||||
results = []
|
||||
for epoch in range(5):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['lr'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
results.append(self.optimizer.param_groups[0]['lr'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
scheduler2 = ExponentialLR(self.optimizer, gamma=0.9, last_step=4)
|
||||
for epoch in range(6):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['lr'])
|
||||
scheduler2.step()
|
||||
results.append(self.optimizer.param_groups[0]['lr'])
|
||||
scheduler2.step()
|
||||
|
||||
for epoch in range(epochs):
|
||||
assert_allclose(
|
||||
|
@ -121,7 +128,10 @@ class TestLRScheduler(TestCase):
|
|||
|
||||
def test_get_last_value(self):
|
||||
epochs = 10
|
||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepLR(self.optimizer, 3, gamma=0.1)
|
||||
for epoch in range(epochs):
|
||||
result = scheduler.get_last_value()
|
||||
|
@ -171,7 +181,9 @@ class TestLRScheduler(TestCase):
|
|||
single_targets = [0.05] * begin + [x * 0.05
|
||||
for x in interpolation] + [0.05] * (
|
||||
epochs - iters - begin)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearLR(
|
||||
self.optimizer,
|
||||
start_factor=start_factor,
|
||||
|
@ -207,7 +219,9 @@ class TestLRScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepLR(
|
||||
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -220,7 +234,9 @@ class TestLRScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = MultiStepLR(
|
||||
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -234,7 +250,9 @@ class TestLRScheduler(TestCase):
|
|||
# lr = 0.005 if 5 <= epoch
|
||||
epochs = 10
|
||||
single_targets = [0.025] * 4 + [0.05] * 6
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ConstantLR(self.optimizer, factor=1.0 / 2, end=5)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
||||
|
@ -260,7 +278,9 @@ class TestLRScheduler(TestCase):
|
|||
]
|
||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearLR(
|
||||
self.optimizer, start_factor=start_factor, end=iters + 1)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -268,7 +288,9 @@ class TestLRScheduler(TestCase):
|
|||
def test_exp_scheduler(self):
|
||||
epochs = 10
|
||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ExponentialLR(self.optimizer, gamma=0.9)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
||||
|
@ -280,7 +302,9 @@ class TestLRScheduler(TestCase):
|
|||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||
for x in range(epochs)
|
||||
]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = CosineAnnealingLR(self.optimizer, T_max=t, eta_min=eta_min)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
||||
|
@ -289,12 +313,17 @@ class TestLRScheduler(TestCase):
|
|||
power = 0.9
|
||||
min_lr = 0.001
|
||||
iters = 4
|
||||
single_targets = [
|
||||
targets_layer1 = [
|
||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||
for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets_layer2 = [
|
||||
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||
(1 - i / iters)**power for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [targets_layer1, targets_layer2]
|
||||
scheduler = PolyLR(
|
||||
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
||||
self._test_scheduler_value(scheduler, targets, epochs=10)
|
||||
|
@ -365,8 +394,7 @@ class TestLRScheduler(TestCase):
|
|||
epoch_length = 7
|
||||
single_targets = [0.05] * 2 * epoch_length + [0.005] * 2 * epoch_length
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepLR.build_iter_from_epoch(
|
||||
self.optimizer, gamma=0.1, step_size=2, epoch_length=epoch_length)
|
||||
|
@ -385,8 +413,7 @@ class TestLRScheduler(TestCase):
|
|||
0.0005
|
||||
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = MultiStepLR.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -403,8 +430,7 @@ class TestLRScheduler(TestCase):
|
|||
single_targets = [0.025] * (5 * epoch_length -
|
||||
1) + [0.05] * (5 * epoch_length + 1)
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ConstantLR.build_iter_from_epoch(
|
||||
self.optimizer, factor=1.0 / 2, end=5, epoch_length=epoch_length)
|
||||
|
@ -422,7 +448,9 @@ class TestLRScheduler(TestCase):
|
|||
]
|
||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||
epochs * epoch_length - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearLR.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
start_factor=start_factor,
|
||||
|
@ -438,8 +466,7 @@ class TestLRScheduler(TestCase):
|
|||
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
||||
]
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ExponentialLR.build_iter_from_epoch(
|
||||
self.optimizer, gamma=0.9, epoch_length=epoch_length)
|
||||
|
@ -456,8 +483,7 @@ class TestLRScheduler(TestCase):
|
|||
for x in range(epochs * epoch_length)
|
||||
]
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = CosineAnnealingLR.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -474,15 +500,17 @@ class TestLRScheduler(TestCase):
|
|||
epoch_length = 11
|
||||
|
||||
iters = end * epoch_length - 1
|
||||
single_targets = [
|
||||
targets_layer1 = [
|
||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||
for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
]
|
||||
targets_layer2 = [
|
||||
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||
(1 - i / iters)**power for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [targets_layer1, targets_layer2]
|
||||
scheduler = PolyLR.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
power=power,
|
||||
|
@ -496,7 +524,9 @@ class TestLRScheduler(TestCase):
|
|||
epochs = 12
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearLR(
|
||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||
scheduler2 = MultiStepLR(
|
||||
|
@ -515,7 +545,9 @@ class TestLRScheduler(TestCase):
|
|||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||
]
|
||||
single_targets = single_targets1 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingLR(
|
||||
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
||||
|
||||
|
@ -526,7 +558,9 @@ class TestLRScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearLR(
|
||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||
scheduler2 = MultiStepLR(
|
||||
|
@ -547,7 +581,9 @@ class TestLRScheduler(TestCase):
|
|||
]
|
||||
single_targets = single_targets1 + [single_targets1[-1]
|
||||
] * 5 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingLR(
|
||||
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
||||
|
||||
|
|
|
@ -34,12 +34,38 @@ class TestMomentumScheduler(TestCase):
|
|||
tearDown() -> cleanUp()
|
||||
"""
|
||||
self.model = ToyModel()
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(), lr=0.01, momentum=0.05, weight_decay=5e-4)
|
||||
momentum = 0.05
|
||||
self.layer2_mult = 10
|
||||
self.optimizer = optim.SGD([{
|
||||
'params': self.model.conv1.parameters()
|
||||
}, {
|
||||
'params': self.model.conv2.parameters(),
|
||||
'momentum': momentum * self.layer2_mult
|
||||
}],
|
||||
lr=0.01,
|
||||
momentum=momentum,
|
||||
weight_decay=5e-4)
|
||||
self.optimizer_with_betas = optim.Adam(
|
||||
[{
|
||||
'params': self.model.conv1.parameters()
|
||||
}, {
|
||||
'params': self.model.conv2.parameters(),
|
||||
'betas': (momentum * self.layer2_mult, 0.999)
|
||||
}],
|
||||
lr=0.01,
|
||||
betas=(momentum, 0.999),
|
||||
weight_decay=5e-4)
|
||||
|
||||
def test_invalid_optimizer(self):
|
||||
with self.assertRaisesRegex(TypeError, 'should be an Optimizer'):
|
||||
StepMomentum('invalid_optimizer', step_size=1)
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'optimizer must support momentum when using momentum scheduler'
|
||||
):
|
||||
optimizer = optim.ASGD(
|
||||
self.model.parameters(),
|
||||
lr=0.01,
|
||||
)
|
||||
StepMomentum(optimizer, step_size=1)
|
||||
|
||||
def test_overwrite_optimzer_step(self):
|
||||
# raise warning if the counter in optimizer.step() is overwritten
|
||||
|
@ -66,21 +92,19 @@ class TestMomentumScheduler(TestCase):
|
|||
|
||||
results = []
|
||||
for epoch in range(5):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['momentum'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
results.append(self.optimizer.param_groups[0]['momentum'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
scheduler2 = ExponentialMomentum(
|
||||
self.optimizer, gamma=0.9, last_step=4)
|
||||
for epoch in range(6):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['momentum'])
|
||||
scheduler2.step()
|
||||
results.append(self.optimizer.param_groups[0]['momentum'])
|
||||
scheduler2.step()
|
||||
|
||||
for epoch in range(epochs):
|
||||
assert_allclose(
|
||||
|
@ -119,7 +143,10 @@ class TestMomentumScheduler(TestCase):
|
|||
|
||||
def test_get_last_value(self):
|
||||
epochs = 10
|
||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||
targets = [
|
||||
single_targets, [t * self.layer2_mult for t in single_targets]
|
||||
]
|
||||
scheduler = StepMomentum(self.optimizer, 3, gamma=0.1)
|
||||
for epoch in range(epochs):
|
||||
result = scheduler.get_last_value()
|
||||
|
@ -170,15 +197,18 @@ class TestMomentumScheduler(TestCase):
|
|||
single_targets = [0.05] * begin + [x * 0.05
|
||||
for x in interpolation] + [0.05] * (
|
||||
epochs - iters - begin)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearMomentum(
|
||||
self.optimizer,
|
||||
start_factor=start_factor,
|
||||
begin=begin,
|
||||
end=begin + iters + 1)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
def _test_scheduler_value(self,
|
||||
optimizer,
|
||||
schedulers,
|
||||
targets,
|
||||
epochs=10,
|
||||
|
@ -186,8 +216,7 @@ class TestMomentumScheduler(TestCase):
|
|||
if isinstance(schedulers, _ParamScheduler):
|
||||
schedulers = [schedulers]
|
||||
for epoch in range(epochs):
|
||||
for param_group, target in zip(self.optimizer.param_groups,
|
||||
targets):
|
||||
for param_group, target in zip(optimizer.param_groups, targets):
|
||||
assert_allclose(
|
||||
target[epoch],
|
||||
param_group[param_name],
|
||||
|
@ -196,6 +225,15 @@ class TestMomentumScheduler(TestCase):
|
|||
param_group[param_name]),
|
||||
atol=1e-5,
|
||||
rtol=0)
|
||||
if 'betas' in optimizer.defaults:
|
||||
assert_allclose(
|
||||
target[epoch],
|
||||
param_group['betas'][0],
|
||||
msg='{} is wrong in epoch {}: expected {}, got {}'.
|
||||
format('betas_0', epoch, target[epoch],
|
||||
param_group['betas'][0]),
|
||||
atol=1e-5,
|
||||
rtol=0)
|
||||
[scheduler.step() for scheduler in schedulers]
|
||||
|
||||
def test_step_scheduler(self):
|
||||
|
@ -206,10 +244,17 @@ class TestMomentumScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepMomentum(
|
||||
self.optimizer, gamma=0.1, step_size=3, verbose=True)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = StepMomentum(
|
||||
self.optimizer_with_betas, gamma=0.1, step_size=3, verbose=True)
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_multi_step_scheduler(self):
|
||||
# momentum = 0.05 if epoch < 2
|
||||
|
@ -219,10 +264,17 @@ class TestMomentumScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = MultiStepMomentum(
|
||||
self.optimizer, gamma=0.1, milestones=[2, 5, 9])
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = MultiStepMomentum(
|
||||
self.optimizer_with_betas, gamma=0.1, milestones=[2, 5, 9])
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_constant_scheduler(self):
|
||||
# factor should between 0~1
|
||||
|
@ -233,9 +285,16 @@ class TestMomentumScheduler(TestCase):
|
|||
# momentum = 0.005 if 5 <= epoch
|
||||
epochs = 10
|
||||
single_targets = [0.025] * 4 + [0.05] * 6
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ConstantMomentum(self.optimizer, factor=1.0 / 2, end=5)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = ConstantMomentum(
|
||||
self.optimizer_with_betas, factor=1.0 / 2, end=5)
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_linear_scheduler(self):
|
||||
with self.assertRaises(ValueError):
|
||||
|
@ -259,17 +318,32 @@ class TestMomentumScheduler(TestCase):
|
|||
]
|
||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearMomentum(
|
||||
self.optimizer, start_factor=start_factor, end=iters + 1)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = LinearMomentum(
|
||||
self.optimizer_with_betas,
|
||||
start_factor=start_factor,
|
||||
end=iters + 1)
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_exp_scheduler(self):
|
||||
epochs = 10
|
||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ExponentialMomentum(self.optimizer, gamma=0.9)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = ExponentialMomentum(self.optimizer_with_betas, gamma=0.9)
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_cos_anneal_scheduler(self):
|
||||
epochs = 12
|
||||
|
@ -279,25 +353,46 @@ class TestMomentumScheduler(TestCase):
|
|||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||
for x in range(epochs)
|
||||
]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = CosineAnnealingMomentum(
|
||||
self.optimizer, T_max=t, eta_min=eta_min)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, scheduler, targets, epochs)
|
||||
|
||||
scheduler = CosineAnnealingMomentum(
|
||||
self.optimizer_with_betas, T_max=t, eta_min=eta_min)
|
||||
self._test_scheduler_value(self.optimizer_with_betas, scheduler,
|
||||
targets, epochs)
|
||||
|
||||
def test_poly_scheduler(self):
|
||||
epochs = 10
|
||||
power = 0.9
|
||||
min_lr = 0.001
|
||||
iters = 4
|
||||
single_targets = [
|
||||
layer1_targets = [
|
||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||
for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
layer2_targets = [
|
||||
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||
(1 - i / iters)**power for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [layer1_targets, layer2_targets]
|
||||
scheduler = PolyMomentum(
|
||||
self.optimizer, power=power, eta_min=min_lr, end=iters + 1)
|
||||
self._test_scheduler_value(scheduler, targets, epochs=10)
|
||||
self._test_scheduler_value(
|
||||
self.optimizer, scheduler, targets, epochs=10)
|
||||
|
||||
scheduler = PolyMomentum(
|
||||
self.optimizer_with_betas,
|
||||
power=power,
|
||||
eta_min=min_lr,
|
||||
end=iters + 1)
|
||||
self._test_scheduler_value(
|
||||
self.optimizer_with_betas, scheduler, targets, epochs=10)
|
||||
|
||||
def _check_scheduler_state_dict(self, construct, construct2, epochs=10):
|
||||
scheduler = construct()
|
||||
|
@ -359,12 +454,15 @@ class TestMomentumScheduler(TestCase):
|
|||
epochs = 12
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearMomentum(
|
||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||
scheduler2 = MultiStepMomentum(
|
||||
self.optimizer, gamma=0.1, milestones=[3, 6], begin=5, end=12)
|
||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||
targets, epochs)
|
||||
|
||||
def test_multi_scheduler_without_overlap_exp_cosine(self):
|
||||
# use Exp in the first 5 epochs and then use Cosine
|
||||
|
@ -379,23 +477,29 @@ class TestMomentumScheduler(TestCase):
|
|||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||
]
|
||||
single_targets = single_targets1 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingMomentum(
|
||||
self.optimizer, T_max=5, eta_min=eta_min, begin=5, end=10)
|
||||
|
||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||
targets, epochs)
|
||||
|
||||
def test_multi_scheduler_with_overlap(self):
|
||||
# use Linear at first 5 epochs together with MultiStep
|
||||
epochs = 10
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearMomentum(
|
||||
self.optimizer, start_factor=1 / 2, begin=0, end=5)
|
||||
scheduler2 = MultiStepMomentum(
|
||||
self.optimizer, gamma=0.1, milestones=[3, 6, 9])
|
||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||
targets, epochs)
|
||||
|
||||
def test_multi_scheduler_with_gap(self):
|
||||
# use Exp in the first 5 epochs and the last 5 epochs use Cosine
|
||||
|
@ -412,8 +516,11 @@ class TestMomentumScheduler(TestCase):
|
|||
]
|
||||
single_targets = single_targets1 + [single_targets1[-1]
|
||||
] * 5 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingMomentum(
|
||||
self.optimizer, T_max=5, eta_min=eta_min, begin=10, end=15)
|
||||
|
||||
self._test_scheduler_value([scheduler1, scheduler2], targets, epochs)
|
||||
self._test_scheduler_value(self.optimizer, [scheduler1, scheduler2],
|
||||
targets, epochs)
|
||||
|
|
|
@ -39,8 +39,22 @@ class TestParameterScheduler(TestCase):
|
|||
tearDown() -> cleanUp()
|
||||
"""
|
||||
self.model = ToyModel()
|
||||
self.layer2_mult = 10
|
||||
lr = 0.05
|
||||
momentum = 0.01
|
||||
weight_decay = 5e-4
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(), lr=0.05, momentum=0.01, weight_decay=5e-4)
|
||||
[{
|
||||
'params': self.model.conv1.parameters()
|
||||
}, {
|
||||
'params': self.model.conv2.parameters(),
|
||||
'lr': lr * self.layer2_mult,
|
||||
'momentum': momentum * self.layer2_mult,
|
||||
'weight_decay': weight_decay * self.layer2_mult
|
||||
}],
|
||||
lr=lr,
|
||||
momentum=momentum,
|
||||
weight_decay=weight_decay)
|
||||
|
||||
def test_base_scheduler_step(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
|
@ -83,21 +97,19 @@ class TestParameterScheduler(TestCase):
|
|||
|
||||
results = []
|
||||
for epoch in range(5):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['lr'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
results.append(self.optimizer.param_groups[0]['lr'])
|
||||
# The order should be
|
||||
# train_epoch() -> save_checkpoint() -> scheduler.step().
|
||||
# Break at here to simulate the checkpoint is saved before
|
||||
# the scheduler.step().
|
||||
if epoch == 4:
|
||||
break
|
||||
scheduler.step()
|
||||
scheduler2 = ExponentialParamScheduler(
|
||||
self.optimizer, param_name='lr', gamma=0.9, last_step=4)
|
||||
for epoch in range(6):
|
||||
for param_group in self.optimizer.param_groups:
|
||||
results.append(param_group['lr'])
|
||||
scheduler2.step()
|
||||
results.append(self.optimizer.param_groups[0]['lr'])
|
||||
scheduler2.step()
|
||||
|
||||
for epoch in range(epochs):
|
||||
assert_allclose(
|
||||
|
@ -141,7 +153,10 @@ class TestParameterScheduler(TestCase):
|
|||
|
||||
def test_get_last_value(self):
|
||||
epochs = 10
|
||||
targets = [[0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]]
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepParamScheduler(
|
||||
self.optimizer, param_name='lr', step_size=3, gamma=0.1)
|
||||
for epoch in range(epochs):
|
||||
|
@ -199,7 +214,9 @@ class TestParameterScheduler(TestCase):
|
|||
single_targets = [0.05] * begin + [x * 0.05
|
||||
for x in interpolation] + [0.05] * (
|
||||
epochs - iters - begin)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -241,7 +258,9 @@ class TestParameterScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -254,7 +273,9 @@ class TestParameterScheduler(TestCase):
|
|||
# momentum = 0.001 if 2 <= epoch < 4
|
||||
epochs = 4
|
||||
single_targets = [0.01] * 2 + [0.001] * 2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepParamScheduler(
|
||||
self.optimizer, param_name='momentum', gamma=0.1, step_size=2)
|
||||
self._test_scheduler_value(
|
||||
|
@ -268,7 +289,9 @@ class TestParameterScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005
|
||||
] * 3
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = MultiStepParamScheduler(
|
||||
self.optimizer, param_name='lr', gamma=0.1, milestones=[2, 5, 9])
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -282,7 +305,9 @@ class TestParameterScheduler(TestCase):
|
|||
# lr = 0.005 if 5 <= epoch
|
||||
epochs = 10
|
||||
single_targets = [0.025] * 4 + [0.05] * 6
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ConstantParamScheduler(
|
||||
self.optimizer, param_name='lr', factor=1.0 / 2, end=5)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -313,7 +338,9 @@ class TestParameterScheduler(TestCase):
|
|||
]
|
||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -324,7 +351,9 @@ class TestParameterScheduler(TestCase):
|
|||
def test_exp_scheduler(self):
|
||||
epochs = 10
|
||||
single_targets = [0.05 * (0.9**x) for x in range(epochs)]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ExponentialParamScheduler(
|
||||
self.optimizer, param_name='lr', gamma=0.9)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -337,7 +366,9 @@ class TestParameterScheduler(TestCase):
|
|||
eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / t)) / 2
|
||||
for x in range(epochs)
|
||||
]
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = CosineAnnealingParamScheduler(
|
||||
self.optimizer, param_name='lr', T_max=t, eta_min=eta_min)
|
||||
self._test_scheduler_value(scheduler, targets, epochs)
|
||||
|
@ -347,12 +378,17 @@ class TestParameterScheduler(TestCase):
|
|||
power = 0.9
|
||||
min_lr = 0.001
|
||||
iters = 4
|
||||
single_targets = [
|
||||
targets_layer1 = [
|
||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||
for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets_layer2 = [
|
||||
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||
(1 - i / iters)**power for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [targets_layer1, targets_layer2]
|
||||
scheduler = PolyParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -451,8 +487,7 @@ class TestParameterScheduler(TestCase):
|
|||
epoch_length = 7
|
||||
single_targets = [0.01] * 2 * epoch_length + [0.001] * 2 * epoch_length
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = StepParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -475,8 +510,7 @@ class TestParameterScheduler(TestCase):
|
|||
0.0005
|
||||
] * 4 * epoch_length + [0.00005] * 3 * epoch_length
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = MultiStepParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -494,8 +528,7 @@ class TestParameterScheduler(TestCase):
|
|||
single_targets = [0.025] * (5 * epoch_length -
|
||||
1) + [0.05] * (5 * epoch_length + 1)
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ConstantParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -517,7 +550,9 @@ class TestParameterScheduler(TestCase):
|
|||
]
|
||||
single_targets = [x * 0.05 for x in interpolation] + [0.05] * (
|
||||
epochs * epoch_length - iters)
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = LinearParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -534,8 +569,7 @@ class TestParameterScheduler(TestCase):
|
|||
0.05 * (0.9**x) for x in range(epochs * epoch_length)
|
||||
]
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = ExponentialParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -555,8 +589,7 @@ class TestParameterScheduler(TestCase):
|
|||
for x in range(epochs * epoch_length)
|
||||
]
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler = CosineAnnealingParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
|
@ -574,15 +607,17 @@ class TestParameterScheduler(TestCase):
|
|||
epoch_length = 11
|
||||
|
||||
iters = end * epoch_length - 1
|
||||
single_targets = [
|
||||
targets_layer1 = [
|
||||
min_lr + (0.05 - min_lr) * (1 - i / iters)**power
|
||||
for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [
|
||||
single_targets,
|
||||
[x * epochs * epoch_length for x in single_targets]
|
||||
]
|
||||
targets_layer2 = [
|
||||
min_lr + (0.05 * self.layer2_mult - min_lr) *
|
||||
(1 - i / iters)**power for i in range(iters)
|
||||
] + [min_lr] * (
|
||||
epochs - iters)
|
||||
targets = [targets_layer1, targets_layer2]
|
||||
scheduler = PolyParamScheduler.build_iter_from_epoch(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -597,7 +632,9 @@ class TestParameterScheduler(TestCase):
|
|||
epochs = 12
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.04375
|
||||
] + [0.05] * 4 + [0.005] * 3 + [0.0005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -626,7 +663,9 @@ class TestParameterScheduler(TestCase):
|
|||
(1 + math.cos(math.pi * x / 5)) / 2 for x in range(5)
|
||||
]
|
||||
single_targets = single_targets1 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -642,7 +681,9 @@ class TestParameterScheduler(TestCase):
|
|||
epochs = 10
|
||||
single_targets = [0.025, 0.03125, 0.0375, 0.004375
|
||||
] + [0.005] * 2 + [0.0005] * 3 + [0.00005] * 1
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler1 = LinearParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
@ -668,7 +709,9 @@ class TestParameterScheduler(TestCase):
|
|||
]
|
||||
single_targets = single_targets1 + [single_targets1[-1]
|
||||
] * 5 + single_targets2
|
||||
targets = [single_targets, [x * epochs for x in single_targets]]
|
||||
targets = [
|
||||
single_targets, [x * self.layer2_mult for x in single_targets]
|
||||
]
|
||||
scheduler2 = CosineAnnealingParamScheduler(
|
||||
self.optimizer,
|
||||
param_name='lr',
|
||||
|
|
Loading…
Reference in New Issue