Add decay to CyclicLrUpdater (#1655)

* Add decay to cyclic LR * Simplify tests * Fix error in tests * fix cyclic lr * add weight decay CyclicLrUpdate test * Update mmcv/runner/hooks/lr_updater.py update docstring Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com> * update CyclicLrUpdater unit test * add comments to CyclicLrUpdaterHook * fix CyclicLrUpdater. Support peak_lr decay when target[0] < 1 * add momentum decay to CyclicMomentumUpdater * update momentum unit test * Fix CyclicMomentum comment Fix CyclicMomentum comment Co-authored-by: Dmytro Panchenko <panchenkodmytry@gmail.com> Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com>
2022-01-24 19:34:25 +08:00 · 2022-01-24 19:34:25 +08:00 · 580e374eae
parent 0448fcf906
commit 580e374eae
3 changed files with 117 additions and 49 deletions
--- a/mmcv/runner/hooks/lr_updater.py
+++ b/mmcv/runner/hooks/lr_updater.py
@ -420,15 +420,19 @@ class CyclicLrUpdaterHook(LrUpdaterHook):
    3D detection area.

    Args:
-        by_epoch (bool): Whether to update LR by epoch.
-        target_ratio (tuple[float]): Relative ratio of the highest LR and the
-            lowest LR to the initial LR.
-        cyclic_times (int): Number of cycles during training
-        step_ratio_up (float): The ratio of the increasing process of LR in
-            the total cycle.
-        anneal_strategy (str): {'cos', 'linear'}
+        by_epoch (bool, optional): Whether to update LR by epoch.
+        target_ratio (tuple[float], optional): Relative ratio of the highest LR
+            and the lowest LR to the initial LR.
+        cyclic_times (int, optional): Number of cycles during training
+        step_ratio_up (float, optional): The ratio of the increasing process of
+            LR in the total cycle.
+        anneal_strategy (str, optional): {'cos', 'linear'}
            Specifies the annealing strategy: 'cos' for cosine annealing,
            'linear' for linear annealing. Default: 'cos'.
+        gamma (float, optional): Cycle decay ratio. Default: 1.
+            It takes values in the range (0, 1]. The difference between the
+            maximum learning rate and the minimum learning rate decreases
+            periodically when it is less than 1. `New in version 1.4.4.`
    """

    def __init__(self,
@ -437,6 +441,7 @@ class CyclicLrUpdaterHook(LrUpdaterHook):
                 cyclic_times=1,
                 step_ratio_up=0.4,
                 anneal_strategy='cos',
+                 gamma=1,
                 **kwargs):
        if isinstance(target_ratio, float):
            target_ratio = (target_ratio, target_ratio / 1e5)
@ -451,10 +456,14 @@ class CyclicLrUpdaterHook(LrUpdaterHook):
            '"target_ratio" must be list or tuple of two floats'
        assert 0 <= step_ratio_up < 1.0, \
            '"step_ratio_up" must be in range [0,1)'
+        assert 0 < gamma <= 1, \
+            '"gamma" must be in range (0, 1]'

        self.target_ratio = target_ratio
        self.cyclic_times = cyclic_times
        self.step_ratio_up = step_ratio_up
+        self.gamma = gamma
+        self.max_iter_per_phase = None
        self.lr_phases = []  # init lr_phases
        # validate anneal_strategy
        if anneal_strategy not in ['cos', 'linear']:
@ -473,21 +482,32 @@ class CyclicLrUpdaterHook(LrUpdaterHook):
        super(CyclicLrUpdaterHook, self).before_run(runner)
        # initiate lr_phases
        # total lr_phases are separated as up and down
-        max_iter_per_phase = runner.max_iters // self.cyclic_times
-        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
-        self.lr_phases.append(
-            [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]])
+        self.max_iter_per_phase = runner.max_iters // self.cyclic_times
+        iter_up_phase = int(self.step_ratio_up * self.max_iter_per_phase)
+        self.lr_phases.append([0, iter_up_phase, 1, self.target_ratio[0]])
        self.lr_phases.append([
-            iter_up_phase, max_iter_per_phase, max_iter_per_phase,
-            self.target_ratio[0], self.target_ratio[1]
+            iter_up_phase, self.max_iter_per_phase, self.target_ratio[0],
+            self.target_ratio[1]
        ])

    def get_lr(self, runner, base_lr):
-        curr_iter = runner.iter
-        for (start_iter, end_iter, max_iter_per_phase, start_ratio,
-             end_ratio) in self.lr_phases:
-            curr_iter %= max_iter_per_phase
+        curr_iter = runner.iter % self.max_iter_per_phase
+        curr_cycle = runner.iter // self.max_iter_per_phase
+        # Update weight decay
+        scale = self.gamma**curr_cycle
+
+        for (start_iter, end_iter, start_ratio, end_ratio) in self.lr_phases:
            if start_iter <= curr_iter < end_iter:
+                # Apply cycle scaling to gradually reduce the difference
+                # between max_lr and base lr. The target end_ratio can be
+                # expressed as:
+                # end_ratio = (base_lr + scale * (max_lr - base_lr)) / base_lr
+                # iteration: 0-iter_up_phase:
+                if start_iter == 0:
+                    end_ratio = 1 - scale + end_ratio * scale
+                # iteration: iter_up_phase-self.max_iter_per_phase
+                else:
+                    start_ratio = 1 - scale + start_ratio * scale
                progress = curr_iter - start_iter
                return self.anneal_func(base_lr * start_ratio,
                                        base_lr * end_ratio,
--- a/mmcv/runner/hooks/momentum_updater.py
+++ b/mmcv/runner/hooks/momentum_updater.py
@ -239,6 +239,13 @@ class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
        step_ratio_up (float): The ratio of the increasing process of momentum
            in  the total cycle.
        by_epoch (bool): Whether to update momentum by epoch.
+        anneal_strategy (str, optional): {'cos', 'linear'}
+            Specifies the annealing strategy: 'cos' for cosine annealing,
+            'linear' for linear annealing. Default: 'cos'.
+        gamma (float, optional): Cycle decay ratio. Default: 1.
+            It takes values in the range (0, 1]. The difference between the
+            maximum learning rate and the minimum learning rate decreases
+            periodically when it is less than 1. `New in version 1.4.4.`
    """

    def __init__(self,
@ -246,6 +253,8 @@ class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
                 target_ratio=(0.85 / 0.95, 1),
                 cyclic_times=1,
                 step_ratio_up=0.4,
+                 anneal_strategy='cos',
+                 gamma=1,
                 **kwargs):
        if isinstance(target_ratio, float):
            target_ratio = (target_ratio, target_ratio / 1e5)
@ -264,7 +273,16 @@ class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
        self.target_ratio = target_ratio
        self.cyclic_times = cyclic_times
        self.step_ratio_up = step_ratio_up
+        self.gamma = gamma
        self.momentum_phases = []  # init momentum_phases
+
+        if anneal_strategy not in ['cos', 'linear']:
+            raise ValueError('anneal_strategy must be one of "cos" or '
+                             f'"linear", instead got {anneal_strategy}')
+        elif anneal_strategy == 'cos':
+            self.anneal_func = annealing_cos
+        elif anneal_strategy == 'linear':
+            self.anneal_func = annealing_linear
        # currently only support by_epoch=False
        assert not by_epoch, \
            'currently only support "by_epoch" = False'
@ -276,23 +294,36 @@ class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
        # total momentum_phases are separated as up and down
        max_iter_per_phase = runner.max_iters // self.cyclic_times
        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
+        self.max_iter_per_phase = max_iter_per_phase
        self.momentum_phases.append(
-            [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]])
+            [0, iter_up_phase, 1, self.target_ratio[0]])
        self.momentum_phases.append([
-            iter_up_phase, max_iter_per_phase, max_iter_per_phase,
-            self.target_ratio[0], self.target_ratio[1]
+            iter_up_phase, max_iter_per_phase, self.target_ratio[0],
+            self.target_ratio[1]
        ])

    def get_momentum(self, runner, base_momentum):
-        curr_iter = runner.iter
-        for (start_iter, end_iter, max_iter_per_phase, start_ratio,
-             end_ratio) in self.momentum_phases:
-            curr_iter %= max_iter_per_phase
+        curr_iter = runner.iter % self.max_iter_per_phase
+        curr_cycle = runner.iter // self.max_iter_per_phase
+        scale = self.gamma**curr_cycle
+        for (start_iter, end_iter, start_ratio, end_ratio) \
+                in self.momentum_phases:
            if start_iter <= curr_iter < end_iter:
+                # Apply cycle scaling to gradually reduce the difference
+                # between max_momentum and base momentum. The target end_ratio
+                # can be expressed as:
+                # end_ratio = (base_momentum + scale * \
+                # (max_momentum - base_momentum)) / base_momentum
+                # iteration: 0-iter_up_phase:
+                if start_iter == 0:
+                    end_ratio = 1 - scale + end_ratio * scale
+                # iteration: iter_up_phase-self.max_iter_per_phase
+                else:
+                    start_ratio = 1 - scale + start_ratio * scale
                progress = curr_iter - start_iter
-                return annealing_cos(base_momentum * start_ratio,
-                                     base_momentum * end_ratio,
-                                     progress / (end_iter - start_iter))
+                return self.anneal_func(base_momentum * start_ratio,
+                                        base_momentum * end_ratio,
+                                        progress / (end_iter - start_iter))


@HOOKS.register_module()
--- a/tests/test_runner/test_hooks.py
+++ b/tests/test_runner/test_hooks.py
@ -354,20 +354,23 @@ def test_sync_buffers_hook():
    shutil.rmtree(runner.work_dir)


-@pytest.mark.parametrize('multi_optimziers', (True, False))
-def test_momentum_runner_hook(multi_optimziers):
+@pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times',
+                         [(True, 8, 1, 1), (False, 8, 0.5, 2)])
+def test_momentum_runner_hook(multi_optimizers, max_iters, gamma,
+                              cyclic_times):
    """xdoctest -m tests/test_hooks.py test_momentum_runner_hook."""
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
-    runner = _build_demo_runner(multi_optimziers=multi_optimziers)
+    runner = _build_demo_runner(multi_optimziers=multi_optimizers)

    # add momentum scheduler
    hook_cfg = dict(
        type='CyclicMomentumUpdaterHook',
        by_epoch=False,
        target_ratio=(0.85 / 0.95, 1),
-        cyclic_times=1,
-        step_ratio_up=0.4)
+        cyclic_times=cyclic_times,
+        step_ratio_up=0.4,
+        gamma=gamma)
    runner.register_hook_from_cfg(hook_cfg)

    # add momentum LR scheduler
@ -388,7 +391,7 @@ def test_momentum_runner_hook(multi_optimziers):

    # TODO: use a more elegant way to check values
    assert hasattr(hook, 'writer')
-    if multi_optimziers:
+    if multi_optimizers:
        calls = [
            call(
                'train', {
@ -419,13 +422,17 @@ def test_momentum_runner_hook(multi_optimziers):
                'momentum': 0.95
            }, 1),
            call('train', {
-                'learning_rate': 0.2,
+                'learning_rate': 0.11,
                'momentum': 0.85
-            }, 5),
+            }, 3),
            call('train', {
-                'learning_rate': 0.155,
-                'momentum': 0.875
-            }, 7),
+                'learning_rate': 0.1879422863405995,
+                'momentum': 0.95
+            }, 6),
+            call('train', {
+                'learning_rate': 0.11000000000000001,
+                'momentum': 0.9
+            }, 8),
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)

@ -1267,26 +1274,31 @@ def test_step_runner_hook(multi_optimziers):
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)


-@pytest.mark.parametrize('multi_optimizers, max_iters', [(True, 8),
-                                                         (False, 8)])
-def test_cyclic_lr_update_hook(multi_optimizers, max_iters):
+@pytest.mark.parametrize('multi_optimizers, max_iters, gamma, cyclic_times',
+                         [(True, 8, 1, 1), (False, 8, 0.5, 2)])
+def test_cyclic_lr_update_hook(multi_optimizers, max_iters, gamma,
+                               cyclic_times):
    """Test CyclicLrUpdateHook."""
    with pytest.raises(AssertionError):
        # by_epoch should be False
        CyclicLrUpdaterHook(by_epoch=True)

    with pytest.raises(AssertionError):
-        # target_ratio" must be either float or tuple/list of two floats
+        # target_ratio must be either float or tuple/list of two floats
        CyclicLrUpdaterHook(by_epoch=False, target_ratio=(10.0, 0.1, 0.2))

    with pytest.raises(AssertionError):
-        # step_ratio_up" must be in range [0,1)
+        # step_ratio_up must be in range [0,1)
        CyclicLrUpdaterHook(by_epoch=False, step_ratio_up=1.4)

    with pytest.raises(ValueError):
        # anneal_strategy must be one of "cos" or "linear"
        CyclicLrUpdaterHook(by_epoch=False, anneal_strategy='sin')

+    with pytest.raises(AssertionError):
+        # gamma must be in range (0, 1]
+        CyclicLrUpdaterHook(by_epoch=False, gamma=0)
+
    sys.modules['pavi'] = MagicMock()
    loader = DataLoader(torch.ones((10, 2)))
    runner = _build_demo_runner(
@ -1296,13 +1308,14 @@ def test_cyclic_lr_update_hook(multi_optimizers, max_iters):
        multi_optimziers=multi_optimizers)

    # add cyclic LR scheduler
-    hook = CyclicLrUpdaterHook(
+    schedule_hook = CyclicLrUpdaterHook(
        by_epoch=False,
        target_ratio=(10.0, 1.0),
-        cyclic_times=1,
+        cyclic_times=cyclic_times,
        step_ratio_up=0.5,
-        anneal_strategy='linear')
-    runner.register_hook(hook)
+        anneal_strategy='linear',
+        gamma=gamma)
+    runner.register_hook(schedule_hook)
    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
    runner.register_hook(IterTimerHook())
    # add pavi hook
@ -1343,13 +1356,17 @@ def test_cyclic_lr_update_hook(multi_optimizers, max_iters):
                'momentum': 0.95
            }, 1),
            call('train', {
-                'learning_rate': 0.155,
+                'learning_rate': 0.11,
                'momentum': 0.95
            }, 4),
            call('train', {
-                'learning_rate': 0.155,
+                'learning_rate': 0.065,
                'momentum': 0.95
            }, 6),
+            call('train', {
+                'learning_rate': 0.11,
+                'momentum': 0.95
+            }, 7),
        ]
    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)