[fix] EMAHook load state dict (#507)

* fix ema load_state_dict * fix ema load_state_dict * fix for test * fix by review * fix resume and keys
2022-09-09 12:41:12 +09:00 · 2022-09-09 12:41:12 +09:00 · a6f5297727
parent cfb884c180
commit a6f5297727
2 changed files with 38 additions and 6 deletions
--- a/mmengine/hooks/ema_hook.py
+++ b/mmengine/hooks/ema_hook.py
@ -7,6 +7,7 @@ from typing import Dict, Optional
 from mmengine.logging import print_log
 from mmengine.model import is_model_wrapper
 from mmengine.registry import HOOKS, MODELS
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
 from .hook import DATA_BATCH, Hook


@ -171,7 +172,7 @@ class EMAHook(Hook):
        Args:
            runner (Runner): The runner of the testing process.
        """
-        if 'ema_state_dict' in checkpoint:
+        if 'ema_state_dict' in checkpoint and runner._resume:
            # The original model parameters are actually saved in ema
            # field swap the weights back to resume ema state.
            self._swap_ema_state_dict(checkpoint)
@ -180,11 +181,13 @@ class EMAHook(Hook):

        # Support load checkpoint without ema state dict.
        else:
-            print_log(
-                'There is no `ema_state_dict` in checkpoint. '
-                '`EMAHook` will make a copy of `state_dict` as the '
-                'initial `ema_state_dict`', 'current', logging.WARNING)
-            self.ema_model.module.load_state_dict(
+            if runner._resume:
+                print_log(
+                    'There is no `ema_state_dict` in checkpoint. '
+                    '`EMAHook` will make a copy of `state_dict` as the '
+                    'initial `ema_state_dict`', 'current', logging.WARNING)
+            _load_checkpoint_to_model(
+                self.ema_model.module,
                copy.deepcopy(checkpoint['state_dict']),
                strict=self.strict_load)

--- a/tests/test_hooks/test_ema_hook.py
+++ b/tests/test_hooks/test_ema_hook.py
@ -56,6 +56,16 @@ class ToyModel2(BaseModel, ToyModel):
        return super(BaseModel, self).forward(*args, **kwargs)


+class ToyModel3(BaseModel, ToyModel):
+
+    def __init__(self):
+        super().__init__()
+        self.linear1 = nn.Linear(2, 2)
+
+    def forward(self, *args, **kwargs):
+        return super(BaseModel, self).forward(*args, **kwargs)
+
+
@DATASETS.register_module()
 class DummyDataset(Dataset):
    METAINFO = dict()  # type: ignore
@ -203,6 +213,25 @@ class TestEMAHook(TestCase):
            experiment_name='test5')
        runner.test()

+        # Test does not load ckpt strict_loadly.
+        # Test load checkpoint without ema_state_dict
+        # Test with different size head.
+        runner = Runner(
+            model=ToyModel3(),
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name, 'epoch_2.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook', strict_load=False)],
+            experiment_name='test5')
+        runner.test()
+
        # Test enable ema at 5 epochs.
        runner = Runner(
            model=model,