Make LaProp weight decay match typical PyTorch 'decoupled' behaviour where it's scaled by LR

2025-06-03 15:01:08 +08:00 · 2024-11-29 16:44:43 -08:00 · 2024-11-29 16:44:43 -08:00 · 82e8677690
commit 82e8677690
parent 886eb77938
1 changed files with 1 additions and 1 deletions
--- a/timm/optim/laprop.py
+++ b/timm/optim/laprop.py
@ -116,6 +116,6 @@ class LaProp(Optimizer):
                p.add_(exp_avg, alpha=-step_size)

                if group['weight_decay'] != 0:
-                    p.add_(p, alpha=-group['weight_decay'])
+                    p.add_(p, alpha=-(group['lr'] * group['weight_decay']))

        return loss