fast-reid/fastreid/solver/lr_scheduler.py

# encoding: utf-8
"""
@author:  liaoxingyu
@contact: sherlockliao01@gmail.com
"""

import math
from bisect import bisect_right
from typing import List

import torch
from torch.optim.lr_scheduler import _LRScheduler

__all__ = ["WarmupMultiStepLR", "WarmupCosineAnnealingLR"]


class WarmupMultiStepLR(_LRScheduler):
    def __init__(
            self,
            optimizer: torch.optim.Optimizer,
            milestones: List[int],
            gamma: float = 0.1,
            warmup_factor: float = 0.001,
            warmup_iters: int = 1000,
            warmup_method: str = "linear",
            last_epoch: int = -1,
            **kwargs,
    ):
        if not list(milestones) == sorted(milestones):
            raise ValueError(
                "Milestones should be a list of" " increasing integers. Got {}", milestones
            )
        self.milestones = milestones
        self.gamma = gamma
        self.warmup_factor = warmup_factor
        self.warmup_iters = warmup_iters
        self.warmup_method = warmup_method
        super().__init__(optimizer, last_epoch)

    def get_lr(self) -> List[float]:
        warmup_factor = _get_warmup_factor_at_iter(
            self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
        )
        return [
            base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
            for base_lr in self.base_lrs
        ]

    def _compute_values(self) -> List[float]:
        # The new interface
        return self.get_lr()


class WarmupCosineAnnealingLR(_LRScheduler):
    r"""Set the learning rate of each parameter group using a cosine annealing
    schedule, where :math:`\eta_{max}` is set to the initial lr and
    :math:`T_{cur}` is the number of epochs since the last restart in SGDR:

    .. math::
        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
        \cos(\frac{T_{cur}}{T_{max}}\pi))

    When last_epoch=-1, sets initial lr as lr.

    It has been proposed in
    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
    implements the cosine annealing part of SGDR, and not the restarts.

    Args:
        optimizer (Optimizer): Wrapped optimizer.
        T_max (int): Maximum number of iterations.
        eta_min (float): Minimum learning rate. Default: 0.
        last_epoch (int): The index of last epoch. Default: -1.

    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
        https://arxiv.org/abs/1608.03983
    """

    def __init__(
            self,
            optimizer: torch.optim.Optimizer,
            max_iters: int,
            delay_iters: int = 0,
            eta_min_lr: int = 0,
            warmup_factor: float = 0.001,
            warmup_iters: int = 1000,
            warmup_method: str = "linear",
            last_epoch=-1,
            **kwargs
    ):
        self.max_iters = max_iters
        self.delay_iters = delay_iters
        self.eta_min_lr = eta_min_lr
        self.warmup_factor = warmup_factor
        self.warmup_iters = warmup_iters
        self.warmup_method = warmup_method
        assert self.delay_iters >= self.warmup_iters, "Scheduler delay iters must be larger than warmup iters"
        super(WarmupCosineAnnealingLR, self).__init__(optimizer, last_epoch)

    def get_lr(self) -> List[float]:
        if self.last_epoch <= self.warmup_iters:
            warmup_factor = _get_warmup_factor_at_iter(
                self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor,
            )
            return [
                base_lr * warmup_factor for base_lr in self.base_lrs
            ]
        elif self.last_epoch <= self.delay_iters:
            return self.base_lrs

        else:
            return [
                self.eta_min_lr + (base_lr - self.eta_min_lr) *
                (1 + math.cos(
                    math.pi * (self.last_epoch - self.delay_iters) / (self.max_iters - self.delay_iters))) / 2
                for base_lr in self.base_lrs]


def _get_warmup_factor_at_iter(
        method: str, iter: int, warmup_iters: int, warmup_factor: float
) -> float:
    """
    Return the learning rate warmup factor at a specific iteration.
    See https://arxiv.org/abs/1706.02677 for more details.
    Args:
        method (str): warmup method; either "constant" or "linear".
        iter (int): iteration at which to calculate the warmup factor.
        warmup_iters (int): the number of warmup iterations.
        warmup_factor (float): the base warmup factor (the meaning changes according
            to the method used).
    Returns:
        float: the effective warmup factor at the given iteration.
    """
    if iter >= warmup_iters:
        return 1.0

    if method == "constant":
        return warmup_factor
    elif method == "linear":
        alpha = iter / warmup_iters
        return warmup_factor * (1 - alpha) + alpha
    else:
        raise ValueError("Unknown warmup method: {}".format(method))
Update sampler code 2020-02-10 07:38:56 +08:00			`# encoding: utf-8`
			`"""`
			`@author: liaoxingyu`
			`@contact: sherlockliao01@gmail.com`
			`"""`

finish v0.2 ddp training 2020-07-06 16:57:43 +08:00			`import math`
Update sampler code 2020-02-10 07:38:56 +08:00			`from bisect import bisect_right`
			`from typing import List`

			`import torch`
finish v0.2 ddp training 2020-07-06 16:57:43 +08:00			`from torch.optim.lr_scheduler import _LRScheduler`
Update sampler code 2020-02-10 07:38:56 +08:00
finish v0.2 ddp training 2020-07-06 16:57:43 +08:00			`__all__ = ["WarmupMultiStepLR", "WarmupCosineAnnealingLR"]`
Update sampler code 2020-02-10 07:38:56 +08:00
update version0.2 code 2020-03-25 10:58:26 +08:00
			`class WarmupMultiStepLR(_LRScheduler):`
Update sampler code 2020-02-10 07:38:56 +08:00			`def __init__(`
			`self,`
			`optimizer: torch.optim.Optimizer,`
			`milestones: List[int],`
			`gamma: float = 0.1,`
			`warmup_factor: float = 0.001,`
			`warmup_iters: int = 1000,`
			`warmup_method: str = "linear",`
			`last_epoch: int = -1,`
feat($solver): change scheduler call methods using name of lr scheduler in config to call 2020-04-27 15:12:01 +08:00			`**kwargs,`
Update sampler code 2020-02-10 07:38:56 +08:00			`):`
			`if not list(milestones) == sorted(milestones):`
			`raise ValueError(`
			`"Milestones should be a list of" " increasing integers. Got {}", milestones`
			`)`
			`self.milestones = milestones`
			`self.gamma = gamma`
			`self.warmup_factor = warmup_factor`
			`self.warmup_iters = warmup_iters`
			`self.warmup_method = warmup_method`
			`super().__init__(optimizer, last_epoch)`

			`def get_lr(self) -> List[float]:`
			`warmup_factor = _get_warmup_factor_at_iter(`
			`self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor`
			`)`
			`return [`
			`base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)`
			`for base_lr in self.base_lrs`
			`]`

			`def _compute_values(self) -> List[float]:`
			`# The new interface`
			`return self.get_lr()`


finish v0.2 ddp training 2020-07-06 16:57:43 +08:00			`class WarmupCosineAnnealingLR(_LRScheduler):`
			`r"""Set the learning rate of each parameter group using a cosine annealing`
			schedule, where :math:`\eta_{max}` is set to the initial lr and
			:math:`T_{cur}` is the number of epochs since the last restart in SGDR:

			`.. math::`
			`\eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +`
			`\cos(\frac{T_{cur}}{T_{max}}\pi))`

			`When last_epoch=-1, sets initial lr as lr.`

			`It has been proposed in`
			`SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
			`implements the cosine annealing part of SGDR, and not the restarts.`

			`Args:`
			`optimizer (Optimizer): Wrapped optimizer.`
			`T_max (int): Maximum number of iterations.`
			`eta_min (float): Minimum learning rate. Default: 0.`
			`last_epoch (int): The index of last epoch. Default: -1.`

			`.. _SGDR\: Stochastic Gradient Descent with Warm Restarts:`
			`https://arxiv.org/abs/1608.03983`
			`"""`

			`def __init__(`
			`self,`
			`optimizer: torch.optim.Optimizer,`
			`max_iters: int,`
			`delay_iters: int = 0,`
			`eta_min_lr: int = 0,`
			`warmup_factor: float = 0.001,`
			`warmup_iters: int = 1000,`
			`warmup_method: str = "linear",`
			`last_epoch=-1,`
			`**kwargs`
			`):`
			`self.max_iters = max_iters`
			`self.delay_iters = delay_iters`
			`self.eta_min_lr = eta_min_lr`
			`self.warmup_factor = warmup_factor`
			`self.warmup_iters = warmup_iters`
			`self.warmup_method = warmup_method`
			`assert self.delay_iters >= self.warmup_iters, "Scheduler delay iters must be larger than warmup iters"`
			`super(WarmupCosineAnnealingLR, self).__init__(optimizer, last_epoch)`

			`def get_lr(self) -> List[float]:`
			`if self.last_epoch <= self.warmup_iters:`
			`warmup_factor = _get_warmup_factor_at_iter(`
			`self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor,`
			`)`
			`return [`
			`base_lr * warmup_factor for base_lr in self.base_lrs`
			`]`
			`elif self.last_epoch <= self.delay_iters:`
			`return self.base_lrs`

			`else:`
			`return [`
			`self.eta_min_lr + (base_lr - self.eta_min_lr) *`
			`(1 + math.cos(`
			`math.pi * (self.last_epoch - self.delay_iters) / (self.max_iters - self.delay_iters))) / 2`
			`for base_lr in self.base_lrs]`


Update sampler code 2020-02-10 07:38:56 +08:00			`def _get_warmup_factor_at_iter(`
			`method: str, iter: int, warmup_iters: int, warmup_factor: float`
			`) -> float:`
			`"""`
			`Return the learning rate warmup factor at a specific iteration.`
			`See https://arxiv.org/abs/1706.02677 for more details.`
			`Args:`
			`method (str): warmup method; either "constant" or "linear".`
			`iter (int): iteration at which to calculate the warmup factor.`
			`warmup_iters (int): the number of warmup iterations.`
			`warmup_factor (float): the base warmup factor (the meaning changes according`
			`to the method used).`
			`Returns:`
			`float: the effective warmup factor at the given iteration.`
			`"""`
			`if iter >= warmup_iters:`
			`return 1.0`

			`if method == "constant":`
			`return warmup_factor`
			`elif method == "linear":`
			`alpha = iter / warmup_iters`
			`return warmup_factor * (1 - alpha) + alpha`
			`else:`
			`raise ValueError("Unknown warmup method: {}".format(method))`