From 7204c1ca25fa69a911802edab719b4cc323103f4 Mon Sep 17 00:00:00 2001 From: Yonghye Kwon Date: Sat, 16 Jul 2022 22:51:48 +0900 Subject: [PATCH] Explicitly set `weight_decay` value (#8592) * explicitly set weight_decay value The default weight_decay value of AdamW is 1e-2, so we should set it to zero. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Cleanup Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index bf5b4c69d..ff13f1e25 100644 --- a/train.py +++ b/train.py @@ -163,12 +163,12 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if opt.optimizer == 'Adam': optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum elif opt.optimizer == 'AdamW': - optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999), weight_decay=0.0) else: optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay - optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights) + optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias") del g