Fix adopt descriptions

small_384_weights
Ross Wightman 2024-11-12 17:55:57 -08:00 committed by Ross Wightman
parent ce42cc4846
commit 61305cc26a
2 changed files with 7 additions and 3 deletions

View File

@ -432,17 +432,17 @@ def _register_adam_variants(registry: OptimizerRegistry) -> None:
OptimInfo(
name='adafactorbv',
opt_class=AdafactorBigVision,
description='Big Vision variant of Adafactor with factored gradients, half precision momentum.',
description='Big Vision variant of Adafactor with factored gradients, half precision momentum',
),
OptimInfo(
name='adopt',
opt_class=Adopt,
description='Memory-efficient implementation of Adam with factored gradients',
description='Modified Adam that can converge with any β2 with the optimal rate',
),
OptimInfo(
name='adoptw',
opt_class=Adopt,
description='Memory-efficient implementation of Adam with factored gradients',
description='Modified AdamW (decoupled decay) that can converge with any β2 with the optimal rate',
defaults={'decoupled': True}
),
]

View File

@ -51,6 +51,10 @@ def _get_value(x):
class Adopt(Optimizer):
"""
ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853
"""
def __init__(
self,
params,