[Feature] NaiveVisualizationHook (#98)

* [WIP] testvisualizationhook * add TestNaiveVisualizationHook * fix comment * unpad * batch imdenormalize * fix comment * fix comment
2025-06-03 21:54:44 +08:00 · 2022-03-10 17:22:31 +08:00 · 2022-03-10 17:22:31 +08:00 · 3e0c064f49
commit 3e0c064f49
parent 02ceaedb82
4 changed files with 202 additions and 1 deletions
--- a/mmengine/hooks/init.py
+++ b/mmengine/hooks/init.py
@ -4,6 +4,7 @@ from .empty_cache_hook import EmptyCacheHook
 from .hook import Hook
 from .iter_timer_hook import IterTimerHook
 from .logger_hook import LoggerHook
+from .naive_visualization_hook import NaiveVisualizationHook
 from .optimizer_hook import OptimizerHook
 from .param_scheduler_hook import ParamSchedulerHook
 from .sampler_seed_hook import DistSamplerSeedHook
@ -12,5 +13,5 @@ from .sync_buffer_hook import SyncBuffersHook
 __all__ = [
    'Hook', 'IterTimerHook', 'DistSamplerSeedHook', 'ParamSchedulerHook',
    'OptimizerHook', 'SyncBuffersHook', 'EmptyCacheHook', 'CheckpointHook',
-    'LoggerHook'
+    'LoggerHook', 'NaiveVisualizationHook'
 ]
--- a/mmengine/hooks/naive_visualization_hook.py
+++ b/mmengine/hooks/naive_visualization_hook.py
@ -0,0 +1,71 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import Any, Optional, Sequence, Tuple
+
+import cv2
+import numpy as np
+
+from mmengine.data import BaseDataSample
+from mmengine.hooks import Hook
+from mmengine.registry import HOOKS
+from mmengine.utils.misc import tensor2imgs
+
+
+@HOOKS.register_module()
+class NaiveVisualizationHook(Hook):
+    """Show or Write the predicted results during the process of testing.
+
+    Args:
+        interval (int): Visualization interval. Default: 1.
+        draw_gt (bool): Whether to draw the ground truth. Default to True.
+        draw_pred (bool): Whether to draw the predicted result.
+            Default to True.
+    """
+    priority = 'NORMAL'
+
+    def __init__(self,
+                 interval: int = 1,
+                 draw_gt: bool = True,
+                 draw_pred: bool = True):
+        self.draw_gt = draw_gt
+        self.draw_pred = draw_pred
+        self._interval = interval
+
+    def _unpad(self, input: np.ndarray, unpad_shape: Tuple[int,
+                                                           int]) -> np.ndarray:
+        unpad_width, unpad_height = unpad_shape
+        unpad_image = input[:unpad_height, :unpad_width]
+        return unpad_image
+
+    def after_test_iter(
+            self,
+            runner,
+            data_batch: Optional[Sequence[Tuple[Any, BaseDataSample]]] = None,
+            outputs: Optional[Sequence[BaseDataSample]] = None) -> None:
+        """Show or Write the predicted results.
+
+        Args:
+            runner (Runner): The runner of the training process.
+            data_batch (Sequence[Tuple[Any, BaseDataSample]], optional): Data
+                from dataloader. Defaults to None.
+            outputs (Sequence[BaseDataSample], optional): Outputs from model.
+                Defaults to None.
+        """
+        if self.every_n_iters(runner, self._interval):
+            inputs, data_samples = data_batch  # type: ignore
+            inputs = tensor2imgs(inputs,
+                                 **data_samples[0].get('img_norm_cfg', dict()))
+            for input, data_sample, output in zip(
+                    inputs,
+                    data_samples,  # type: ignore
+                    outputs):  # type: ignore
+                # TODO We will implement a function to revert the augmentation
+                # in the future.
+                ori_shape = (data_sample.ori_width, data_sample.ori_height)
+                if 'pad_shape' in data_sample:
+                    input = self._unpad(input,
+                                        data_sample.get('scale', ori_shape))
+                origin_image = cv2.resize(input, ori_shape)
+                name = osp.basename(data_sample.img_path)
+                runner.writer.add_image(name, origin_image, data_sample,
+                                        output, self.draw_gt, self.draw_pred)
--- a/mmengine/utils/misc.py
+++ b/mmengine/utils/misc.py
@ -11,6 +11,8 @@ from inspect import getfullargspec
 from itertools import repeat
 from typing import Any, Callable, Optional, Sequence, Tuple, Type, Union

+import numpy as np
+import torch
 import torch.nn as nn

 from .parrots_wrapper import _BatchNorm, _InstanceNorm
@ -433,3 +435,46 @@ def is_norm(layer: nn.Module,

    all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
    return isinstance(layer, all_norm_bases)
+
+
+def tensor2imgs(tensor: torch.Tensor,
+                mean: Optional[Tuple[float, float, float]] = None,
+                std: Optional[Tuple[float, float, float]] = None,
+                to_bgr: bool = True):
+    """Convert tensor to 3-channel images or 1-channel gray images.
+
+    Args:
+        tensor (torch.Tensor): Tensor that contains multiple images, shape (
+            N, C, H, W). :math:`C` can be either 3 or 1. If C is 3, the format
+            should be RGB.
+        mean (tuple[float], optional): Mean of images. If None,
+            (0, 0, 0) will be used for tensor with 3-channel,
+            while (0, ) for tensor with 1-channel. Defaults to None.
+        std (tuple[float], optional): Standard deviation of images. If None,
+            (1, 1, 1) will be used for tensor with 3-channel,
+            while (1, ) for tensor with 1-channel. Defaults to None.
+        to_bgr (bool): For the tensor with 3 channel, convert its format to
+            BGR. For the tensor with 1 channel, it must be False. Defaults to
+            True.
+
+    Returns:
+        list[np.ndarray]: A list that contains multiple images.
+    """
+
+    assert torch.is_tensor(tensor) and tensor.ndim == 4
+    channels = tensor.size(1)
+    assert channels in [1, 3]
+    if mean is None:
+        mean = (0, ) * channels
+    if std is None:
+        std = (1, ) * channels
+    assert (channels == len(mean) == len(std) == 3) or \
+        (channels == len(mean) == len(std) == 1 and not to_bgr)
+    mean = tensor.new_tensor(mean).view(1, -1)
+    std = tensor.new_tensor(std).view(1, -1)
+    tensor = tensor.permute(0, 2, 3, 1) * std + mean
+    imgs = tensor.detach().cpu().numpy()
+    if to_bgr and channels == 3:
+        imgs = imgs[:, :, :, (2, 1, 0)]  # RGB2BGR
+    imgs = [np.ascontiguousarray(img) for img in imgs]
+    return imgs
--- a/tests/test_hook/test_naive_visualization_hook.py
+++ b/tests/test_hook/test_naive_visualization_hook.py
@ -0,0 +1,84 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest.mock import Mock
+
+import torch
+
+from mmengine.data import BaseDataSample
+from mmengine.hooks import NaiveVisualizationHook
+
+
+class TestNaiveVisualizationHook:
+
+    def test_after_train_iter(self):
+        naive_visualization_hook = NaiveVisualizationHook()
+        Runner = Mock(iter=1)
+        Runner.writer.add_image = Mock()
+        inputs = torch.randn(1, 3, 15, 15)
+        # test with normalize, resize, pad
+        gt_datasamples = [
+            BaseDataSample(
+                metainfo=dict(
+                    img_norm_cfg=dict(
+                        mean=(0, 0, 0), std=(0.5, 0.5, 0.5), to_bgr=True),
+                    scale=(10, 10),
+                    pad_shape=(15, 15, 3),
+                    ori_height=5,
+                    ori_width=5,
+                    img_path='tmp.jpg'))
+        ]
+        pred_datasamples = [BaseDataSample()]
+        data_batch = (inputs, gt_datasamples)
+        naive_visualization_hook.after_test_iter(Runner, data_batch,
+                                                 pred_datasamples)
+        # test with resize, pad
+        gt_datasamples = [
+            BaseDataSample(
+                metainfo=dict(
+                    scale=(10, 10),
+                    pad_shape=(15, 15, 3),
+                    ori_height=5,
+                    ori_width=5,
+                    img_path='tmp.jpg')),
+        ]
+        pred_datasamples = [BaseDataSample()]
+        data_batch = (inputs, gt_datasamples)
+        naive_visualization_hook.after_test_iter(Runner, data_batch,
+                                                 pred_datasamples)
+        # test with only resize
+        gt_datasamples = [
+            BaseDataSample(
+                metainfo=dict(
+                    scale=(15, 15),
+                    ori_height=5,
+                    ori_width=5,
+                    img_path='tmp.jpg')),
+        ]
+        pred_datasamples = [BaseDataSample()]
+        data_batch = (inputs, gt_datasamples)
+        naive_visualization_hook.after_test_iter(Runner, data_batch,
+                                                 pred_datasamples)
+
+        # test with only pad
+        gt_datasamples = [
+            BaseDataSample(
+                metainfo=dict(
+                    pad_shape=(15, 15, 3),
+                    ori_height=5,
+                    ori_width=5,
+                    img_path='tmp.jpg')),
+        ]
+        pred_datasamples = [BaseDataSample()]
+        data_batch = (inputs, gt_datasamples)
+        naive_visualization_hook.after_test_iter(Runner, data_batch,
+                                                 pred_datasamples)
+
+        # test no transform
+        gt_datasamples = [
+            BaseDataSample(
+                metainfo=dict(ori_height=15, ori_width=15,
+                              img_path='tmp.jpg')),
+        ]
+        pred_datasamples = [BaseDataSample()]
+        data_batch = (inputs, gt_datasamples)
+        naive_visualization_hook.after_test_iter(Runner, data_batch,
+                                                 pred_datasamples)