From 66604e83dee6ba1bd2cfdfe2916b76bee9ea6e70 Mon Sep 17 00:00:00 2001
From: Wang Xinjiang <wangxinjiang@sensetime.com>
Date: Fri, 24 Jul 2020 14:15:44 +0800
Subject: [PATCH] Add syncbuffer hook (#443)

* reformat

* reformat

* Add register hook from cfg

* docstring

* change according to comments
---
 mmcv/runner/base_runner.py       | 16 ++++++++++++++
 mmcv/runner/hooks/sync_buffer.py | 26 +++++++++++++++++++++++
 tests/test_runner/test_hooks.py  | 36 ++++++++++++++++++--------------
 3 files changed, 62 insertions(+), 16 deletions(-)
 create mode 100644 mmcv/runner/hooks/sync_buffer.py

diff --git a/mmcv/runner/base_runner.py b/mmcv/runner/base_runner.py
index c25215850..80db664eb 100644
--- a/mmcv/runner/base_runner.py
+++ b/mmcv/runner/base_runner.py
@@ -271,6 +271,22 @@ class BaseRunner(metaclass=ABCMeta):
         if not inserted:
             self._hooks.insert(0, hook)
 
+    def register_hook_from_cfg(self, hook_cfg):
+        """Register a hook from its cfg.
+
+        Args:
+            hook_cfg (dict): Hook config. It should have at least keys 'type'
+              and 'priority' indicating its type and priority.
+
+        Notes:
+            The specific hook class to register should not use 'type' and
+            'priority' arguments during initialization.
+        """
+        hook_cfg = hook_cfg.copy()
+        priority = hook_cfg.pop('priority', 'NORMAL')
+        hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
+        self.register_hook(hook, priority=priority)
+
     def call_hook(self, fn_name):
         """Call all hooks.
 
diff --git a/mmcv/runner/hooks/sync_buffer.py b/mmcv/runner/hooks/sync_buffer.py
new file mode 100644
index 000000000..70c06443f
--- /dev/null
+++ b/mmcv/runner/hooks/sync_buffer.py
@@ -0,0 +1,26 @@
+# Copyright (c) Open-MMLab. All rights reserved.
+import torch.distributed as dist
+
+from .hook import HOOKS, Hook
+
+
+@HOOKS.register_module()
+class SyncBuffersHook(Hook):
+    """Synchronize model buffers such as running_mean and running_var in BN at
+    the end of each epoch.
+
+    Args:
+        distributed (bool): Whether distributed training is used. It is
+          effective only for distributed training. Defaults to True.
+    """
+
+    def __init__(self, distributed=True):
+        self.distributed = distributed
+
+    def after_epoch(self, runner):
+        """All-reduce model buffers at the end of each epoch."""
+        if self.distributed:
+            buffers = runner.model.buffers()
+            world_size = dist.get_world_size()
+            for tensor in buffers:
+                dist.all_reduce(tensor.div_(world_size))
diff --git a/tests/test_runner/test_hooks.py b/tests/test_runner/test_hooks.py
index a8f4c0734..1f5ca20c7 100644
--- a/tests/test_runner/test_hooks.py
+++ b/tests/test_runner/test_hooks.py
@@ -18,11 +18,7 @@ from torch.utils.data import DataLoader
 
 from mmcv.runner import (EpochBasedRunner, IterTimerHook, MlflowLoggerHook,
                          PaviLoggerHook, WandbLoggerHook)
-from mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
-                                          CosineRestartLrUpdaterHook,
-                                          CyclicLrUpdaterHook)
-from mmcv.runner.hooks.momentum_updater import (
-    CosineAnnealingMomentumUpdaterHook, CyclicMomentumUpdaterHook)
+from mmcv.runner.hooks.lr_updater import CosineRestartLrUpdaterHook
 
 
 def test_pavi_hook():
@@ -53,21 +49,23 @@ def test_momentum_runner_hook():
     runner = _build_demo_runner()
 
     # add momentum scheduler
-    hook = CyclicMomentumUpdaterHook(
+    hook_cfg = dict(
+        type='CyclicMomentumUpdaterHook',
         by_epoch=False,
         target_ratio=(0.85 / 0.95, 1),
         cyclic_times=1,
         step_ratio_up=0.4)
-    runner.register_hook(hook)
+    runner.register_hook_from_cfg(hook_cfg)
 
     # add momentum LR scheduler
-    hook = CyclicLrUpdaterHook(
+    hook_cfg = dict(
+        type='CyclicLrUpdaterHook',
         by_epoch=False,
         target_ratio=(10, 1),
         cyclic_times=1,
         step_ratio_up=0.4)
-    runner.register_hook(hook)
-    runner.register_hook(IterTimerHook())
+    runner.register_hook_from_cfg(hook_cfg)
+    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
 
     # add pavi hook
     hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
@@ -101,19 +99,25 @@ def test_cosine_runner_hook():
     runner = _build_demo_runner()
 
     # add momentum scheduler
-    hook = CosineAnnealingMomentumUpdaterHook(
+
+    hook_cfg = dict(
+        type='CosineAnnealingMomentumUpdaterHook',
         min_momentum_ratio=0.99 / 0.95,
         by_epoch=False,
         warmup_iters=2,
         warmup_ratio=0.9 / 0.95)
-    runner.register_hook(hook)
+    runner.register_hook_from_cfg(hook_cfg)
 
     # add momentum LR scheduler
-    hook = CosineAnnealingLrUpdaterHook(
-        by_epoch=False, min_lr_ratio=0, warmup_iters=2, warmup_ratio=0.9)
-    runner.register_hook(hook)
+    hook_cfg = dict(
+        type='CosineAnnealingLrUpdaterHook',
+        by_epoch=False,
+        min_lr_ratio=0,
+        warmup_iters=2,
+        warmup_ratio=0.9)
+    runner.register_hook_from_cfg(hook_cfg)
+    runner.register_hook_from_cfg(dict(type='IterTimerHook'))
     runner.register_hook(IterTimerHook())
-
     # add pavi hook
     hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)
     runner.register_hook(hook)