From 61305cc26abd99b3e6fec9226f0bbb9db79f75ff Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Tue, 12 Nov 2024 17:55:57 -0800 Subject: [PATCH] Fix adopt descriptions --- timm/optim/_optim_factory.py | 6 +++--- timm/optim/adopt.py | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/timm/optim/_optim_factory.py b/timm/optim/_optim_factory.py index b3759a37..97cbfd22 100644 --- a/timm/optim/_optim_factory.py +++ b/timm/optim/_optim_factory.py @@ -432,17 +432,17 @@ def _register_adam_variants(registry: OptimizerRegistry) -> None: OptimInfo( name='adafactorbv', opt_class=AdafactorBigVision, - description='Big Vision variant of Adafactor with factored gradients, half precision momentum.', + description='Big Vision variant of Adafactor with factored gradients, half precision momentum', ), OptimInfo( name='adopt', opt_class=Adopt, - description='Memory-efficient implementation of Adam with factored gradients', + description='Modified Adam that can converge with any β2 with the optimal rate', ), OptimInfo( name='adoptw', opt_class=Adopt, - description='Memory-efficient implementation of Adam with factored gradients', + description='Modified AdamW (decoupled decay) that can converge with any β2 with the optimal rate', defaults={'decoupled': True} ), ] diff --git a/timm/optim/adopt.py b/timm/optim/adopt.py index 648d9b6a..486cb626 100644 --- a/timm/optim/adopt.py +++ b/timm/optim/adopt.py @@ -51,6 +51,10 @@ def _get_value(x): class Adopt(Optimizer): + """ + ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853 + + """ def __init__( self, params,