Update a few comment, add some references

2025-06-03 15:01:08 +08:00 · 2019-04-12 23:16:49 -07:00 · 2019-04-12 23:16:49 -07:00 · db1fe34d0c
commit db1fe34d0c
parent 0562b91c38
6 changed files with 12 additions and 7 deletions
--- a/optim/adabound.py
+++ b/optim/adabound.py
@ -20,6 +20,9 @@ class AdaBound(Optimizer):
        amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
    .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
        https://openreview.net/forum?id=Bkg3g2R9FX
+
+    Originally taken from https://github.com/Luolc/AdaBound
+    NOTE: Has not provided good (or even decent) results on large datasets like ImageNet
    """

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
--- a/optim/nadam.py
+++ b/optim/nadam.py
@ -20,6 +20,9 @@ class Nadam(Optimizer):

    __ http://cs229.stanford.edu/proj2015/054_report.pdf
    __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
+
+        Originally taken from: https://github.com/pytorch/pytorch/pull/1408
+        NOTE: Has potential issues but does work well on some problems.
    """

    def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8,
--- a/scheduler/cosine_lr.py
+++ b/scheduler/cosine_lr.py
@ -11,8 +11,11 @@ logger = logging.getLogger(__name__)

 class CosineLRScheduler(Scheduler):
    """
-    Cosine annealing with restarts.
+    Cosine decay with restarts.
    This is described in the paper https://arxiv.org/abs/1608.03983.
+
+    Inspiration from
+    https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py
    """

    def __init__(self,
--- a/scheduler/tanh_lr.py
+++ b/scheduler/tanh_lr.py
@ -11,8 +11,8 @@ logger = logging.getLogger(__name__)

 class TanhLRScheduler(Scheduler):
    """
-    Cosine annealing with restarts.
-    This is described in the paper https://arxiv.org/abs/1608.03983.
+    Hyberbolic-Tangent decay with restarts.
+    This is described in the paper https://arxiv.org/abs/1806.01593
    """

    def __init__(self,
--- a/utils.py
+++ b/utils.py
@ -1,7 +1,5 @@
 import torch
-import numbers
 import math
-import numpy as np
 import os
 import shutil
 import glob
--- a/validate.py
+++ b/validate.py
@ -68,7 +68,6 @@ def main():
    else:
        model = model.cuda()

-    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    loader = create_loader(
@ -87,7 +86,6 @@ def main():
    top1 = AverageMeter()
    top5 = AverageMeter()

-    # switch to evaluate mode
    model.eval()
    end = time.time()
    with torch.no_grad():