|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
from __future__ import unicode_literals |
|
|
|
from paddle.optimizer import lr |
|
from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay |
|
|
|
|
|
class Linear(object): |
|
""" |
|
Linear learning rate decay |
|
Args: |
|
lr (float): The initial learning rate. It is a python float number. |
|
epochs(int): The decay step size. It determines the decay cycle. |
|
end_lr(float, optional): The minimum final learning rate. Default: 0.0001. |
|
power(float, optional): Power of polynomial. Default: 1.0. |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
epochs, |
|
step_each_epoch, |
|
end_lr=0.0, |
|
power=1.0, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(Linear, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.epochs = epochs * step_each_epoch |
|
self.end_lr = end_lr |
|
self.power = power |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = lr.PolynomialDecay( |
|
learning_rate=self.learning_rate, |
|
decay_steps=self.epochs, |
|
end_lr=self.end_lr, |
|
power=self.power, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class Cosine(object): |
|
""" |
|
Cosine learning rate decay |
|
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) |
|
Args: |
|
lr(float): initial learning rate |
|
step_each_epoch(int): steps each epoch |
|
epochs(int): total training epochs |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_each_epoch, |
|
epochs, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(Cosine, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.T_max = step_each_epoch * epochs |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = lr.CosineAnnealingDecay( |
|
learning_rate=self.learning_rate, |
|
T_max=self.T_max, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class Step(object): |
|
""" |
|
Piecewise learning rate decay |
|
Args: |
|
step_each_epoch(int): steps each epoch |
|
learning_rate (float): The initial learning rate. It is a python float number. |
|
step_size (int): the interval to update. |
|
gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . |
|
It should be less than 1.0. Default: 0.1. |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_size, |
|
step_each_epoch, |
|
gamma, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(Step, self).__init__() |
|
self.step_size = step_each_epoch * step_size |
|
self.learning_rate = learning_rate |
|
self.gamma = gamma |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = lr.StepDecay( |
|
learning_rate=self.learning_rate, |
|
step_size=self.step_size, |
|
gamma=self.gamma, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class Piecewise(object): |
|
""" |
|
Piecewise learning rate decay |
|
Args: |
|
boundaries(list): A list of steps numbers. The type of element in the list is python int. |
|
values(list): A list of learning rate values that will be picked during different epoch boundaries. |
|
The type of element in the list is python float. |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
step_each_epoch, |
|
decay_epochs, |
|
values, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(Piecewise, self).__init__() |
|
self.boundaries = [step_each_epoch * e for e in decay_epochs] |
|
self.values = values |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = lr.PiecewiseDecay( |
|
boundaries=self.boundaries, |
|
values=self.values, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.values[0], |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class CyclicalCosine(object): |
|
""" |
|
Cyclical cosine learning rate decay |
|
Args: |
|
learning_rate(float): initial learning rate |
|
step_each_epoch(int): steps each epoch |
|
epochs(int): total training epochs |
|
cycle(int): period of the cosine learning rate |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_each_epoch, |
|
epochs, |
|
cycle, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(CyclicalCosine, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.T_max = step_each_epoch * epochs |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
self.cycle = round(cycle * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = CyclicalCosineDecay( |
|
learning_rate=self.learning_rate, |
|
T_max=self.T_max, |
|
cycle=self.cycle, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class OneCycle(object): |
|
""" |
|
One Cycle learning rate decay |
|
Args: |
|
max_lr(float): Upper learning rate boundaries |
|
epochs(int): total training epochs |
|
step_each_epoch(int): steps each epoch |
|
anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing. |
|
Default: ‘cos’ |
|
three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’ |
|
instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’). |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
max_lr, |
|
epochs, |
|
step_each_epoch, |
|
anneal_strategy='cos', |
|
three_phase=False, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(OneCycle, self).__init__() |
|
self.max_lr = max_lr |
|
self.epochs = epochs |
|
self.steps_per_epoch = step_each_epoch |
|
self.anneal_strategy = anneal_strategy |
|
self.three_phase = three_phase |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = OneCycleDecay( |
|
max_lr=self.max_lr, |
|
epochs=self.epochs, |
|
steps_per_epoch=self.steps_per_epoch, |
|
anneal_strategy=self.anneal_strategy, |
|
three_phase=self.three_phase, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.max_lr, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class Const(object): |
|
""" |
|
Const learning rate decay |
|
Args: |
|
learning_rate(float): initial learning rate |
|
step_each_epoch(int): steps each epoch |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_each_epoch, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(Const, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = self.learning_rate |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class DecayLearningRate(object): |
|
""" |
|
DecayLearningRate learning rate decay |
|
new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr |
|
Args: |
|
learning_rate(float): initial learning rate |
|
step_each_epoch(int): steps each epoch |
|
epochs(int): total training epochs |
|
factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9 |
|
end_lr(float): The minimum final learning rate. Default: 0.0. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_each_epoch, |
|
epochs, |
|
factor=0.9, |
|
end_lr=0, |
|
**kwargs): |
|
super(DecayLearningRate, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.epochs = epochs + 1 |
|
self.factor = factor |
|
self.end_lr = 0 |
|
self.decay_steps = step_each_epoch * epochs |
|
|
|
def __call__(self): |
|
learning_rate = lr.PolynomialDecay( |
|
learning_rate=self.learning_rate, |
|
decay_steps=self.decay_steps, |
|
power=self.factor, |
|
end_lr=self.end_lr) |
|
return learning_rate |
|
|
|
|
|
class MultiStepDecay(object): |
|
""" |
|
Piecewise learning rate decay |
|
Args: |
|
step_each_epoch(int): steps each epoch |
|
learning_rate (float): The initial learning rate. It is a python float number. |
|
step_size (int): the interval to update. |
|
gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . |
|
It should be less than 1.0. Default: 0.1. |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
milestones, |
|
step_each_epoch, |
|
gamma, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(MultiStepDecay, self).__init__() |
|
self.milestones = [step_each_epoch * e for e in milestones] |
|
self.learning_rate = learning_rate |
|
self.gamma = gamma |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = lr.MultiStepDecay( |
|
learning_rate=self.learning_rate, |
|
milestones=self.milestones, |
|
gamma=self.gamma, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|
|
|
|
class TwoStepCosine(object): |
|
""" |
|
Cosine learning rate decay |
|
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) |
|
Args: |
|
lr(float): initial learning rate |
|
step_each_epoch(int): steps each epoch |
|
epochs(int): total training epochs |
|
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. |
|
""" |
|
|
|
def __init__(self, |
|
learning_rate, |
|
step_each_epoch, |
|
epochs, |
|
warmup_epoch=0, |
|
last_epoch=-1, |
|
**kwargs): |
|
super(TwoStepCosine, self).__init__() |
|
self.learning_rate = learning_rate |
|
self.T_max1 = step_each_epoch * 200 |
|
self.T_max2 = step_each_epoch * epochs |
|
self.last_epoch = last_epoch |
|
self.warmup_epoch = round(warmup_epoch * step_each_epoch) |
|
|
|
def __call__(self): |
|
learning_rate = TwoStepCosineDecay( |
|
learning_rate=self.learning_rate, |
|
T_max1=self.T_max1, |
|
T_max2=self.T_max2, |
|
last_epoch=self.last_epoch) |
|
if self.warmup_epoch > 0: |
|
learning_rate = lr.LinearWarmup( |
|
learning_rate=learning_rate, |
|
warmup_steps=self.warmup_epoch, |
|
start_lr=0.0, |
|
end_lr=self.learning_rate, |
|
last_epoch=self.last_epoch) |
|
return learning_rate |
|
|