2020-06-04 12:07:38 +08:00
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2020-04-09 02:16:30 +08:00
#
2020-06-04 12:07:38 +08:00
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
2020-04-09 02:16:30 +08:00
#
# http://www.apache.org/licenses/LICENSE-2.0
#
2020-06-04 12:07:38 +08:00
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2021-09-22 14:35:37 +08:00
2021-06-03 11:00:47 +08:00
from __future__ import ( absolute_import , division , print_function ,
unicode_literals )
2021-05-27 18:41:44 +08:00
from paddle . optimizer import lr
2021-06-03 11:00:47 +08:00
from paddle . optimizer . lr import LRScheduler
2020-04-09 02:16:30 +08:00
2021-09-22 14:35:37 +08:00
from ppcls . utils import logger
2020-04-09 02:16:30 +08:00
2021-05-27 18:41:44 +08:00
class Linear ( object ) :
2020-04-09 02:16:30 +08:00
"""
2021-05-27 18:41:44 +08:00
Linear learning rate decay
2020-04-09 02:16:30 +08:00
Args :
2021-05-27 18:41:44 +08:00
lr ( float ) : The initial learning rate . It is a python float number .
epochs ( int ) : The decay step size . It determines the decay cycle .
end_lr ( float , optional ) : The minimum final learning rate . Default : 0.0001 .
power ( float , optional ) : Power of polynomial . Default : 1.0 .
2021-08-24 15:52:52 +08:00
warmup_epoch ( int ) : The epoch numbers for LinearWarmup . Default : 0.
warmup_start_lr ( float ) : Initial learning rate of warm up . Default : 0.0 .
2021-05-27 18:41:44 +08:00
last_epoch ( int , optional ) : The index of last epoch . Can be set to restart training . Default : - 1 , means initial learning rate .
2020-04-09 02:16:30 +08:00
"""
2021-05-27 18:41:44 +08:00
def __init__ ( self ,
learning_rate ,
epochs ,
step_each_epoch ,
end_lr = 0.0 ,
power = 1.0 ,
warmup_epoch = 0 ,
2021-08-24 15:52:52 +08:00
warmup_start_lr = 0.0 ,
2021-05-27 18:41:44 +08:00
last_epoch = - 1 ,
* * kwargs ) :
2021-09-22 14:35:37 +08:00
super ( ) . __init__ ( )
if warmup_epoch > = epochs :
msg = f " When using warm up, the value of \" Global.epochs \" must be greater than value of \" Optimizer.lr.warmup_epoch \" . The value of \" Optimizer.lr.warmup_epoch \" has been set to { epochs } . "
logger . warning ( msg )
warmup_epoch = epochs
2021-05-27 18:41:44 +08:00
self . learning_rate = learning_rate
2021-08-24 15:52:52 +08:00
self . steps = ( epochs - warmup_epoch ) * step_each_epoch
2021-05-27 18:41:44 +08:00
self . end_lr = end_lr
self . power = power
self . last_epoch = last_epoch
2021-08-24 15:52:52 +08:00
self . warmup_steps = round ( warmup_epoch * step_each_epoch )
self . warmup_start_lr = warmup_start_lr
2020-04-09 02:16:30 +08:00
2021-05-27 18:41:44 +08:00
def __call__ ( self ) :
learning_rate = lr . PolynomialDecay (
learning_rate = self . learning_rate ,
2021-08-24 15:52:52 +08:00
decay_steps = self . steps ,
2021-05-27 18:41:44 +08:00
end_lr = self . end_lr ,
power = self . power ,
2021-09-22 14:35:37 +08:00
last_epoch = self .
last_epoch ) if self . steps > 0 else self . learning_rate
2021-08-24 15:52:52 +08:00
if self . warmup_steps > 0 :
2021-05-27 18:41:44 +08:00
learning_rate = lr . LinearWarmup (
learning_rate = learning_rate ,
2021-08-24 15:52:52 +08:00
warmup_steps = self . warmup_steps ,
start_lr = self . warmup_start_lr ,
2021-05-27 18:41:44 +08:00
end_lr = self . learning_rate ,
last_epoch = self . last_epoch )
return learning_rate
class Cosine ( object ) :
2020-04-09 02:16:30 +08:00
"""
2021-05-27 18:41:44 +08:00
Cosine learning rate decay
lr = 0.05 * ( math . cos ( epoch * ( math . pi / epochs ) ) + 1 )
2020-04-09 02:16:30 +08:00
Args :
lr ( float ) : initial learning rate
step_each_epoch ( int ) : steps each epoch
epochs ( int ) : total training epochs
2021-08-24 15:52:52 +08:00
eta_min ( float ) : Minimum learning rate . Default : 0.0 .
warmup_epoch ( int ) : The epoch numbers for LinearWarmup . Default : 0.
warmup_start_lr ( float ) : Initial learning rate of warm up . Default : 0.0 .
2021-05-27 18:41:44 +08:00
last_epoch ( int , optional ) : The index of last epoch . Can be set to restart training . Default : - 1 , means initial learning rate .
2020-04-09 02:16:30 +08:00
"""
2021-05-27 18:41:44 +08:00
def __init__ ( self ,
learning_rate ,
step_each_epoch ,
epochs ,
2021-08-24 15:52:52 +08:00
eta_min = 0.0 ,
2021-05-27 18:41:44 +08:00
warmup_epoch = 0 ,
2021-08-24 15:52:52 +08:00
warmup_start_lr = 0.0 ,
2021-05-27 18:41:44 +08:00
last_epoch = - 1 ,
* * kwargs ) :
2021-09-22 14:35:37 +08:00
super ( ) . __init__ ( )
if warmup_epoch > = epochs :
msg = f " When using warm up, the value of \" Global.epochs \" must be greater than value of \" Optimizer.lr.warmup_epoch \" . The value of \" Optimizer.lr.warmup_epoch \" has been set to { epochs } . "
logger . warning ( msg )
warmup_epoch = epochs
2021-05-27 18:41:44 +08:00
self . learning_rate = learning_rate
2021-08-24 15:52:52 +08:00
self . T_max = ( epochs - warmup_epoch ) * step_each_epoch
self . eta_min = eta_min
2021-05-27 18:41:44 +08:00
self . last_epoch = last_epoch
2021-08-24 15:52:52 +08:00
self . warmup_steps = round ( warmup_epoch * step_each_epoch )
self . warmup_start_lr = warmup_start_lr
2020-04-09 02:16:30 +08:00
2021-05-27 18:41:44 +08:00
def __call__ ( self ) :
learning_rate = lr . CosineAnnealingDecay (
learning_rate = self . learning_rate ,
T_max = self . T_max ,
2021-08-24 15:52:52 +08:00
eta_min = self . eta_min ,
2021-09-22 14:35:37 +08:00
last_epoch = self .
last_epoch ) if self . T_max > 0 else self . learning_rate
2021-08-24 15:52:52 +08:00
if self . warmup_steps > 0 :
2021-05-27 18:41:44 +08:00
learning_rate = lr . LinearWarmup (
learning_rate = learning_rate ,
2021-08-24 15:52:52 +08:00
warmup_steps = self . warmup_steps ,
start_lr = self . warmup_start_lr ,
2021-05-27 18:41:44 +08:00
end_lr = self . learning_rate ,
last_epoch = self . last_epoch )
return learning_rate
class Step ( object ) :
2020-05-06 19:17:39 +08:00
"""
2021-05-27 18:41:44 +08:00
Piecewise learning rate decay
2020-05-06 19:17:39 +08:00
Args :
step_each_epoch ( int ) : steps each epoch
2021-05-27 18:41:44 +08:00
learning_rate ( float ) : The initial learning rate . It is a python float number .
step_size ( int ) : the interval to update .
gamma ( float , optional ) : The Ratio that the learning rate will be reduced . ` ` new_lr = origin_lr * gamma ` ` .
It should be less than 1.0 . Default : 0.1 .
2021-08-24 15:52:52 +08:00
warmup_epoch ( int ) : The epoch numbers for LinearWarmup . Default : 0.
warmup_start_lr ( float ) : Initial learning rate of warm up . Default : 0.0 .
2021-05-27 18:41:44 +08:00
last_epoch ( int , optional ) : The index of last epoch . Can be set to restart training . Default : - 1 , means initial learning rate .
2020-05-06 19:17:39 +08:00
"""
2020-06-04 12:07:38 +08:00
def __init__ ( self ,
2021-05-27 18:41:44 +08:00
learning_rate ,
step_size ,
2020-06-04 12:07:38 +08:00
step_each_epoch ,
2021-09-22 14:35:37 +08:00
epochs ,
2021-05-27 18:41:44 +08:00
gamma ,
warmup_epoch = 0 ,
2021-08-24 15:52:52 +08:00
warmup_start_lr = 0.0 ,
2021-05-27 18:41:44 +08:00
last_epoch = - 1 ,
2020-06-04 12:07:38 +08:00
* * kwargs ) :
2021-09-22 14:35:37 +08:00
super ( ) . __init__ ( )
if warmup_epoch > = epochs :
msg = f " When using warm up, the value of \" Global.epochs \" must be greater than value of \" Optimizer.lr.warmup_epoch \" . The value of \" Optimizer.lr.warmup_epoch \" has been set to { epochs } . "
logger . warning ( msg )
warmup_epoch = epochs
2021-05-27 18:41:44 +08:00
self . step_size = step_each_epoch * step_size
self . learning_rate = learning_rate
self . gamma = gamma
self . last_epoch = last_epoch
2021-08-24 15:52:52 +08:00
self . warmup_steps = round ( warmup_epoch * step_each_epoch )
self . warmup_start_lr = warmup_start_lr
2020-05-06 19:17:39 +08:00
2021-05-27 18:41:44 +08:00
def __call__ ( self ) :
learning_rate = lr . StepDecay (
learning_rate = self . learning_rate ,
step_size = self . step_size ,
gamma = self . gamma ,
last_epoch = self . last_epoch )
2021-08-24 15:52:52 +08:00
if self . warmup_steps > 0 :
2021-05-27 18:41:44 +08:00
learning_rate = lr . LinearWarmup (
learning_rate = learning_rate ,
2021-08-24 15:52:52 +08:00
warmup_steps = self . warmup_steps ,
start_lr = self . warmup_start_lr ,
2021-05-27 18:41:44 +08:00
end_lr = self . learning_rate ,
last_epoch = self . last_epoch )
return learning_rate
class Piecewise ( object ) :
2020-04-09 02:16:30 +08:00
"""
2021-05-27 18:41:44 +08:00
Piecewise learning rate decay
2020-04-09 02:16:30 +08:00
Args :
2021-05-27 18:41:44 +08:00
boundaries ( list ) : A list of steps numbers . The type of element in the list is python int .
values ( list ) : A list of learning rate values that will be picked during different epoch boundaries .
The type of element in the list is python float .
2021-08-24 15:52:52 +08:00
warmup_epoch ( int ) : The epoch numbers for LinearWarmup . Default : 0.
warmup_start_lr ( float ) : Initial learning rate of warm up . Default : 0.0 .
2021-05-27 18:41:44 +08:00
last_epoch ( int , optional ) : The index of last epoch . Can be set to restart training . Default : - 1 , means initial learning rate .
2020-04-09 02:16:30 +08:00
"""
def __init__ ( self ,
2021-05-27 18:41:44 +08:00
step_each_epoch ,
decay_epochs ,
values ,
2021-09-22 14:35:37 +08:00
epochs ,
2021-05-27 18:41:44 +08:00
warmup_epoch = 0 ,
2021-08-24 15:52:52 +08:00
warmup_start_lr = 0.0 ,
2021-05-27 18:41:44 +08:00
last_epoch = - 1 ,
* * kwargs ) :
2021-09-22 14:35:37 +08:00
super ( ) . __init__ ( )
if warmup_epoch > = epochs :
msg = f " When using warm up, the value of \" Global.epochs \" must be greater than value of \" Optimizer.lr.warmup_epoch \" . The value of \" Optimizer.lr.warmup_epoch \" has been set to { epochs } . "
logger . warning ( msg )
warmup_epoch = epochs
2021-05-27 18:41:44 +08:00
self . boundaries = [ step_each_epoch * e for e in decay_epochs ]
self . values = values
self . last_epoch = last_epoch
2021-08-24 15:52:52 +08:00
self . warmup_steps = round ( warmup_epoch * step_each_epoch )
self . warmup_start_lr = warmup_start_lr
2020-04-09 02:16:30 +08:00
def __call__ ( self ) :
2021-05-27 18:41:44 +08:00
learning_rate = lr . PiecewiseDecay (
boundaries = self . boundaries ,
values = self . values ,
last_epoch = self . last_epoch )
2021-08-24 15:52:52 +08:00
if self . warmup_steps > 0 :
2021-05-27 18:41:44 +08:00
learning_rate = lr . LinearWarmup (
learning_rate = learning_rate ,
2021-08-24 15:52:52 +08:00
warmup_steps = self . warmup_steps ,
start_lr = self . warmup_start_lr ,
2021-05-27 18:41:44 +08:00
end_lr = self . values [ 0 ] ,
last_epoch = self . last_epoch )
return learning_rate
2021-06-03 11:00:47 +08:00
class MultiStepDecay ( LRScheduler ) :
"""
Update the learning rate by ` ` gamma ` ` once ` ` epoch ` ` reaches one of the milestones .
2021-08-24 15:52:52 +08:00
The algorithm can be described as the code below .
2021-06-03 11:00:47 +08:00
. . code - block : : text
learning_rate = 0.5
milestones = [ 30 , 50 ]
gamma = 0.1
if epoch < 30 :
learning_rate = 0.5
elif epoch < 50 :
learning_rate = 0.05
else :
learning_rate = 0.005
Args :
learning_rate ( float ) : The initial learning rate . It is a python float number .
milestones ( tuple | list ) : List or tuple of each boundaries . Must be increasing .
2021-08-24 15:52:52 +08:00
gamma ( float , optional ) : The Ratio that the learning rate will be reduced . ` ` new_lr = origin_lr * gamma ` ` .
2021-06-03 11:00:47 +08:00
It should be less than 1.0 . Default : 0.1 .
last_epoch ( int , optional ) : The index of last epoch . Can be set to restart training . Default : - 1 , means initial learning rate .
verbose ( bool , optional ) : If ` ` True ` ` , prints a message to stdout for each update . Default : ` ` False ` ` .
2021-08-24 15:52:52 +08:00
2021-06-03 11:00:47 +08:00
Returns :
` ` MultiStepDecay ` ` instance to schedule learning rate .
Examples :
2021-08-24 15:52:52 +08:00
2021-06-03 11:00:47 +08:00
. . code - block : : python
import paddle
import numpy as np
# train on default dynamic graph mode
linear = paddle . nn . Linear ( 10 , 10 )
scheduler = paddle . optimizer . lr . MultiStepDecay ( learning_rate = 0.5 , milestones = [ 2 , 4 , 6 ] , gamma = 0.8 , verbose = True )
sgd = paddle . optimizer . SGD ( learning_rate = scheduler , parameters = linear . parameters ( ) )
for epoch in range ( 20 ) :
for batch_id in range ( 5 ) :
x = paddle . uniform ( [ 10 , 10 ] )
out = linear ( x )
loss = paddle . mean ( out )
loss . backward ( )
sgd . step ( )
sgd . clear_gradients ( )
scheduler . step ( ) # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
# train on static graph mode
paddle . enable_static ( )
main_prog = paddle . static . Program ( )
start_prog = paddle . static . Program ( )
with paddle . static . program_guard ( main_prog , start_prog ) :
x = paddle . static . data ( name = ' x ' , shape = [ None , 4 , 5 ] )
y = paddle . static . data ( name = ' y ' , shape = [ None , 4 , 5 ] )
z = paddle . static . nn . fc ( x , 100 )
loss = paddle . mean ( z )
scheduler = paddle . optimizer . lr . MultiStepDecay ( learning_rate = 0.5 , milestones = [ 2 , 4 , 6 ] , gamma = 0.8 , verbose = True )
sgd = paddle . optimizer . SGD ( learning_rate = scheduler )
sgd . minimize ( loss )
exe = paddle . static . Executor ( )
exe . run ( start_prog )
for epoch in range ( 20 ) :
for batch_id in range ( 5 ) :
out = exe . run (
main_prog ,
feed = {
' x ' : np . random . randn ( 3 , 4 , 5 ) . astype ( ' float32 ' ) ,
' y ' : np . random . randn ( 3 , 4 , 5 ) . astype ( ' float32 ' )
} ,
fetch_list = loss . name )
scheduler . step ( ) # If you update learning rate each step
# scheduler.step() # If you update learning rate each epoch
"""
def __init__ ( self ,
learning_rate ,
milestones ,
epochs ,
step_each_epoch ,
gamma = 0.1 ,
last_epoch = - 1 ,
verbose = False ) :
if not isinstance ( milestones , ( tuple , list ) ) :
raise TypeError (
" The type of ' milestones ' in ' MultiStepDecay ' must be ' tuple, list ' , but received %s . "
% type ( milestones ) )
if not all ( [
milestones [ i ] < milestones [ i + 1 ]
for i in range ( len ( milestones ) - 1 )
] ) :
raise ValueError ( ' The elements of milestones must be incremented ' )
if gamma > = 1.0 :
raise ValueError ( ' gamma should be < 1.0. ' )
self . milestones = [ x * step_each_epoch for x in milestones ]
self . gamma = gamma
2021-09-22 14:35:37 +08:00
super ( ) . __init__ ( learning_rate , last_epoch , verbose )
2021-06-03 11:00:47 +08:00
def get_lr ( self ) :
for i in range ( len ( self . milestones ) ) :
if self . last_epoch < self . milestones [ i ] :
return self . base_lr * ( self . gamma * * i )
return self . base_lr * ( self . gamma * * len ( self . milestones ) )