diff --git a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py index dc8bc2fe4..2b6b8ff9c 100644 --- a/mmseg/core/optimizers/layer_decay_optimizer_constructor.py +++ b/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -17,7 +17,7 @@ def get_layer_id_for_convnext(var_name, max_layer_id): max_layer_id (int): Maximum number of backbone layers. Returns: - int: The id number corresponding to different learning rate in + int: The id number corresponding to different learning rate in ``LearningRateDecayOptimizerConstructor``. """ @@ -60,7 +60,7 @@ def get_stage_id_for_convnext(var_name, max_stage_id): max_stage_id (int): Maximum number of backbone layers. Returns: - int: The id number corresponding to different learning rate in + int: The id number corresponding to different learning rate in ``LearningRateDecayOptimizerConstructor``. """ @@ -103,8 +103,8 @@ def get_layer_id_for_vit(var_name, max_layer_id): class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): """Different learning rates are set for different layers of backbone. - Note: Currently, this optimizer constructor is built for ConvNeXt - and BEiT. + Note: Currently, this optimizer constructor is built for ConvNeXt, + BEiT and MAE. """ def add_params(self, params, module, **kwargs): diff --git a/tests/test_core/test_layer_decay_optimizer_constructor.py b/tests/test_core/test_layer_decay_optimizer_constructor.py index 268a9a148..4911f3b3a 100644 --- a/tests/test_core/test_layer_decay_optimizer_constructor.py +++ b/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -157,6 +157,19 @@ class ToyBEiT(nn.Module): self.layers.append(layer) +class ToyMAE(nn.Module): + + def __init__(self): + super().__init__() + # add some variables to meet unit test coverate rate + self.cls_token = nn.Parameter(torch.ones(1)) + self.patch_embed = nn.Parameter(torch.ones(1)) + self.layers = nn.ModuleList() + for _ in range(3): + layer = nn.Conv2d(3, 3, 1) + self.layers.append(layer) + + class ToySegmentor(nn.Module): def __init__(self, backbone): @@ -236,6 +249,17 @@ def test_learning_rate_decay_optimizer_constructor(): optimizer_cfg, stagewise_paramwise_cfg) optimizer = optim_constructor(model) + # Test lr wd for MAE + backbone = ToyMAE() + model = PseudoDataParallel(ToySegmentor(backbone)) + + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit) + def test_beit_layer_decay_optimizer_constructor():