Update a few comment, add some references
parent
0562b91c38
commit
db1fe34d0c
|
@ -20,6 +20,9 @@ class AdaBound(Optimizer):
|
|||
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
|
||||
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
|
||||
https://openreview.net/forum?id=Bkg3g2R9FX
|
||||
|
||||
Originally taken from https://github.com/Luolc/AdaBound
|
||||
NOTE: Has not provided good (or even decent) results on large datasets like ImageNet
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
|
||||
|
|
|
@ -20,6 +20,9 @@ class Nadam(Optimizer):
|
|||
|
||||
__ http://cs229.stanford.edu/proj2015/054_report.pdf
|
||||
__ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
|
||||
|
||||
Originally taken from: https://github.com/pytorch/pytorch/pull/1408
|
||||
NOTE: Has potential issues but does work well on some problems.
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8,
|
||||
|
|
|
@ -11,8 +11,11 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class CosineLRScheduler(Scheduler):
|
||||
"""
|
||||
Cosine annealing with restarts.
|
||||
Cosine decay with restarts.
|
||||
This is described in the paper https://arxiv.org/abs/1608.03983.
|
||||
|
||||
Inspiration from
|
||||
https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
|
|
@ -11,8 +11,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class TanhLRScheduler(Scheduler):
|
||||
"""
|
||||
Cosine annealing with restarts.
|
||||
This is described in the paper https://arxiv.org/abs/1608.03983.
|
||||
Hyberbolic-Tangent decay with restarts.
|
||||
This is described in the paper https://arxiv.org/abs/1806.01593
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
|
2
utils.py
2
utils.py
|
@ -1,7 +1,5 @@
|
|||
import torch
|
||||
import numbers
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import shutil
|
||||
import glob
|
||||
|
|
|
@ -68,7 +68,6 @@ def main():
|
|||
else:
|
||||
model = model.cuda()
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().cuda()
|
||||
|
||||
loader = create_loader(
|
||||
|
@ -87,7 +86,6 @@ def main():
|
|||
top1 = AverageMeter()
|
||||
top5 = AverageMeter()
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
end = time.time()
|
||||
with torch.no_grad():
|
||||
|
|
Loading…
Reference in New Issue