fast-reid/fastreid/solver/lr_scheduler.py

144 lines
4.9 KiB
Python
Raw Normal View History

2020-02-10 07:38:56 +08:00
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
2020-07-06 16:57:43 +08:00
import math
2020-02-10 07:38:56 +08:00
from bisect import bisect_right
from typing import List
import torch
2020-07-06 16:57:43 +08:00
from torch.optim.lr_scheduler import _LRScheduler
2020-02-10 07:38:56 +08:00
2020-07-06 16:57:43 +08:00
__all__ = ["WarmupMultiStepLR", "WarmupCosineAnnealingLR"]
2020-02-10 07:38:56 +08:00
2020-03-25 10:58:26 +08:00
class WarmupMultiStepLR(_LRScheduler):
2020-02-10 07:38:56 +08:00
def __init__(
self,
optimizer: torch.optim.Optimizer,
milestones: List[int],
gamma: float = 0.1,
warmup_factor: float = 0.001,
warmup_iters: int = 1000,
warmup_method: str = "linear",
last_epoch: int = -1,
**kwargs,
2020-02-10 07:38:56 +08:00
):
if not list(milestones) == sorted(milestones):
raise ValueError(
"Milestones should be a list of" " increasing integers. Got {}", milestones
)
self.milestones = milestones
self.gamma = gamma
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_method = warmup_method
super().__init__(optimizer, last_epoch)
def get_lr(self) -> List[float]:
warmup_factor = _get_warmup_factor_at_iter(
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
)
return [
base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
for base_lr in self.base_lrs
]
def _compute_values(self) -> List[float]:
# The new interface
return self.get_lr()
2020-07-06 16:57:43 +08:00
class WarmupCosineAnnealingLR(_LRScheduler):
r"""Set the learning rate of each parameter group using a cosine annealing
schedule, where :math:`\eta_{max}` is set to the initial lr and
:math:`T_{cur}` is the number of epochs since the last restart in SGDR:
.. math::
\eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
\cos(\frac{T_{cur}}{T_{max}}\pi))
When last_epoch=-1, sets initial lr as lr.
It has been proposed in
`SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
implements the cosine annealing part of SGDR, and not the restarts.
Args:
optimizer (Optimizer): Wrapped optimizer.
T_max (int): Maximum number of iterations.
eta_min (float): Minimum learning rate. Default: 0.
last_epoch (int): The index of last epoch. Default: -1.
.. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
https://arxiv.org/abs/1608.03983
"""
def __init__(
self,
optimizer: torch.optim.Optimizer,
max_iters: int,
delay_iters: int = 0,
eta_min_lr: int = 0,
warmup_factor: float = 0.001,
warmup_iters: int = 1000,
warmup_method: str = "linear",
last_epoch=-1,
**kwargs
):
self.max_iters = max_iters
self.delay_iters = delay_iters
self.eta_min_lr = eta_min_lr
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_method = warmup_method
assert self.delay_iters >= self.warmup_iters, "Scheduler delay iters must be larger than warmup iters"
super(WarmupCosineAnnealingLR, self).__init__(optimizer, last_epoch)
def get_lr(self) -> List[float]:
if self.last_epoch <= self.warmup_iters:
warmup_factor = _get_warmup_factor_at_iter(
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor,
)
return [
base_lr * warmup_factor for base_lr in self.base_lrs
]
elif self.last_epoch <= self.delay_iters:
return self.base_lrs
else:
return [
self.eta_min_lr + (base_lr - self.eta_min_lr) *
(1 + math.cos(
math.pi * (self.last_epoch - self.delay_iters) / (self.max_iters - self.delay_iters))) / 2
for base_lr in self.base_lrs]
2020-02-10 07:38:56 +08:00
def _get_warmup_factor_at_iter(
method: str, iter: int, warmup_iters: int, warmup_factor: float
) -> float:
"""
Return the learning rate warmup factor at a specific iteration.
See https://arxiv.org/abs/1706.02677 for more details.
Args:
method (str): warmup method; either "constant" or "linear".
iter (int): iteration at which to calculate the warmup factor.
warmup_iters (int): the number of warmup iterations.
warmup_factor (float): the base warmup factor (the meaning changes according
to the method used).
Returns:
float: the effective warmup factor at the given iteration.
"""
if iter >= warmup_iters:
return 1.0
if method == "constant":
return warmup_factor
elif method == "linear":
alpha = iter / warmup_iters
return warmup_factor * (1 - alpha) + alpha
else:
raise ValueError("Unknown warmup method: {}".format(method))