[Feature]: Add evaluator base class. (#41)

* [Feature]: Add evaluator base class. * solve comments * update * fix
2025-06-03 21:54:44 +08:00 · 2022-02-24 23:41:42 +08:00 · 2022-02-24 23:41:42 +08:00 · d0bcb83e41
commit d0bcb83e41
parent 9437ebea67
7 changed files with 313 additions and 2 deletions
--- a/docs/zh_cn/tutorials/registry.md
+++ b/docs/zh_cn/tutorials/registry.md
@ -222,7 +222,9 @@ MMEngine 的注册器支持跨项目调用，即可以在一个项目中使用
 - WEIGHT_INITIALIZERS: 权重初始化的工具
 - OPTIMIZERS: 注册了 PyTorch 中所有的 `optimizer` 以及自定义的 `optimizer`
 - OPTIMIZER_CONSTRUCTORS: optimizer 的构造器
 - PARAM_SCHEDULERS: 各种参数调度器， 如 `MultiStepLR`
 - TASK_UTILS: 任务强相关的一些组件，如 `AnchorGenerator`, `BboxCoder`
 - EVALUATORS: 用于验证模型精度的评估器
 下面我们以 OpenMMLab 开源项目为例介绍如何跨项目调用模块。
--- a/mmengine/evaluator/init.py
+++ b/mmengine/evaluator/init.py
@ -0,0 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .base import BaseEvaluator
 from .builder import build_evaluator
 from .composed_evaluator import ComposedEvaluator
 __all__ = ['BaseEvaluator', 'ComposedEvaluator', 'build_evaluator']
--- a/mmengine/evaluator/base.py
+++ b/mmengine/evaluator/base.py
@ -0,0 +1,210 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os.path as osp
 import pickle
 import shutil
 import tempfile
 import warnings
 from abc import ABCMeta, abstractmethod
 from typing import Any, List, Optional, Union
 import torch
 import torch.distributed as dist
 from mmengine.utils import mkdir_or_exist
 class BaseEvaluator(metaclass=ABCMeta):
    """Base class for an evaluator.
    The evaluator first processes each batch of data_samples and
    predictions, and appends the processed results in to the results list.
    Then it collects all results together from all ranks if distributed
    training is used. Finally, it computes the metrics of the entire dataset.
    Args:
        collect_device (str): Device name used for collecting results from
            different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
    """
    def __init__(self, collect_device: str = 'cpu') -> None:
        self._dataset_meta: Union[None, dict] = None
        self.collect_device = collect_device
        self.results: List[Any] = []
        rank, world_size = get_dist_info()
        self.rank = rank
        self.world_size = world_size
    @property
    def dataset_meta(self) -> Optional[dict]:
        return self._dataset_meta
    @dataset_meta.setter
    def dataset_meta(self, dataset_meta: dict) -> None:
        self._dataset_meta = dataset_meta
    @abstractmethod
    def process(self, data_samples: dict, predictions: dict) -> None:
        """Process one batch of data samples and predictions. The processed
        results should be stored in ``self.results``, which will be used to
        compute the metrics when all batches have been processed.
        Args:
            data_samples (dict): The data samples from the dataset.
            predictions (dict): The output of the model.
        """
    @abstractmethod
    def compute_metrics(self, results: list) -> dict:
        """Compute the metrics from processed results.
        Args:
            results (list): The processed results of each batch.
        Returns:
            dict: The computed metrics. The keys are the names of the metrics,
            and the values are corresponding results.
        """
    def evaluate(self, size: int) -> dict:
        """Evaluate the model performance of the whole dataset after processing
        all batches.
        Args:
            size (int): Length of the entire validation dataset. When batch
                size > 1, the dataloader may pad some data samples to make
                sure all ranks have the same length of dataset slice. The
                ``collect_results`` function will drop the padded data base on
                this size.
        Returns:
            metrics (dict): Evaluation metrics dict on the val dataset. The
            keys are the names of the metrics, and the values are
            corresponding results.
        """
        if len(self.results) == 0:
            warnings.warn(
                f'{self.__class__.__name__} got empty `self._results`. Please '
                'ensure that the processed results are properly added into '
                '`self._results` in `process` method.')
        if self.world_size == 1:
            # non-distributed
            results = self.results
        else:
            results = collect_results(self.results, size, self.collect_device)
        if self.rank == 0:
            # TODO: replace with mmengine.dist.master_only
            metrics = [self.compute_metrics(results)]
        else:
            metrics = [None]  # type: ignore
        # TODO: replace with mmengine.dist.broadcast
        if self.world_size > 1:
            metrics = dist.broadcast_object_list(metrics)
        # reset the results list
        self.results.clear()
        return metrics[0]
 # TODO: replace with mmengine.dist.get_dist_info
 def get_dist_info():
    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    else:
        rank = 0
        world_size = 1
    return rank, world_size
 # TODO: replace with mmengine.dist.collect_results
 def collect_results(results, size, device='cpu'):
    """Collected results in distributed environments."""
    # TODO: replace with mmengine.dist.collect_results
    if device == 'gpu':
        return collect_results_gpu(results, size)
    elif device == 'cpu':
        return collect_results_cpu(results, size)
    else:
        NotImplementedError(f"device must be 'cpu' or 'gpu', but got {device}")
 # TODO: replace with mmengine.dist.collect_results
 def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            mkdir_or_exist('.dist_test')
            tmpdir = tempfile.mkdtemp(dir='.dist_test')
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    with open(osp.join(tmpdir, f'part_{rank}.pkl'), 'wb') as f:
        pickle.dump(result_part, f, protocol=2)
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            with open(osp.join(tmpdir, f'part_{i}.pkl'), 'wb') as f:
                part_list.append(pickle.load(f))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results
 # TODO: replace with mmengine.dist.collect_results
 def collect_results_gpu(result_part, size):
    rank, world_size = get_dist_info()
    # dump result part to tensor with pickle
    part_tensor = torch.tensor(
        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
    # gather all result part tensor shape
    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
    shape_list = [shape_tensor.clone() for _ in range(world_size)]
    dist.all_gather(shape_list, shape_tensor)
    # padding result part tensor to max length
    shape_max = torch.tensor(shape_list).max()
    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
    part_send[:shape_tensor[0]] = part_tensor
    part_recv_list = [
        part_tensor.new_zeros(shape_max) for _ in range(world_size)
    ]
    # gather all result part
    dist.all_gather(part_recv_list, part_send)
    if rank == 0:
        part_list = []
        for recv, shape in zip(part_recv_list, shape_list):
            part_list.append(
                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        return ordered_results
--- a/mmengine/evaluator/builder.py
+++ b/mmengine/evaluator/builder.py
@ -0,0 +1,16 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from ..registry import EVALUATORS
 from .composed_evaluator import ComposedEvaluator
 def build_evaluator(cfg: dict) -> object:
    """Build function of evaluator.
    When the evaluator config is a list, it will automatically build composed
    evaluators.
    """
    if isinstance(cfg, list):
        evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg]
        return ComposedEvaluator(evaluators=evaluators)
    else:
        return EVALUATORS.build(cfg)
--- a/mmengine/evaluator/composed_evaluator.py
+++ b/mmengine/evaluator/composed_evaluator.py
@ -0,0 +1,73 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Optional, Sequence, Union
 from .base import BaseEvaluator
 class ComposedEvaluator:
    """Wrapper class to compose multiple :class:`DatasetEvaluator` instances.
    Args:
        evaluators (Sequence[BaseEvaluator]): The evaluators to compose.
        collect_device (str): Device name used for collecting results from
            different ranks during distributed training. Must be 'cpu' or
            'gpu'. Defaults to 'cpu'.
    """
    def __init__(self,
                 evaluators: Sequence[BaseEvaluator],
                 collect_device='cpu'):
        self._dataset_meta: Union[None, dict] = None
        self.collect_device = collect_device
        self.evaluators = evaluators
    @property
    def dataset_meta(self) -> Optional[dict]:
        return self._dataset_meta
    @dataset_meta.setter
    def dataset_meta(self, dataset_meta: dict) -> None:
        self._dataset_meta = dataset_meta
        for evaluator in self.evaluators:
            evaluator.dataset_meta = dataset_meta
    def process(self, data_samples: dict, predictions: dict):
        """Invoke process method of each wrapped evaluator.
        Args:
            data_samples (dict): The data samples from the dataset.
            predictions (dict): The output of the model.
        """
        for evalutor in self.evaluators:
            evalutor.process(data_samples, predictions)
    def evaluate(self, size: int) -> dict:
        """Invoke evaluate method of each wrapped evaluator and collect the
        metrics dict.
        Args:
            size (int): Length of the entire validation dataset. When batch
                size > 1, the dataloader may pad some data samples to make
                sure all ranks have the same length of dataset slice. The
                ``collect_results`` function will drop the padded data base on
                this size.
        Returns:
            metrics (dict): Evaluation metrics of all wrapped evaluators. The
            keys are the names of the metrics, and the values are
            corresponding results.
        """
        metrics = {}
        for evaluator in self.evaluators:
            _metrics = evaluator.evaluate(size)
            # Check metric name conflicts
            for name in _metrics.keys():
                if name in metrics:
                    raise ValueError(
                        'There are multiple evaluators with the same metric '
                        f'name {name}')
            metrics.update(_metrics)
        return metrics
--- a/mmengine/registry/init.py
+++ b/mmengine/registry/init.py
@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .registry import Registry, build_from_cfg
-from .root import (DATA_SAMPLERS, DATASETS, HOOKS, MODELS,
+from .root import (DATA_SAMPLERS, DATASETS, EVALUATORS, HOOKS, MODELS,
                   OPTIMIZER_CONSTRUCTORS, OPTIMIZERS, PARAM_SCHEDULERS,
                   RUNNER_CONSTRUCTORS, RUNNERS, TASK_UTILS, TRANSFORMS,
                   WEIGHT_INITIALIZERS)
@ -8,5 +8,6 @@ from .root import (DATA_SAMPLERS, DATASETS, HOOKS, MODELS,
 __all__ = [
    'Registry', 'build_from_cfg', 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'HOOKS',
    'DATASETS', 'DATA_SAMPLERS', 'TRANSFORMS', 'MODELS', 'WEIGHT_INITIALIZERS',
-    'OPTIMIZERS', 'OPTIMIZER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS'
+    'OPTIMIZERS', 'OPTIMIZER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS',
    'EVALUATORS'
 ]
--- a/mmengine/registry/root.py
+++ b/mmengine/registry/root.py
@ -34,3 +34,6 @@ PARAM_SCHEDULERS = Registry('parameter scheduler')
 # manage task-specific modules like anchor generators and box coders
 TASK_UTILS = Registry('task util')
 # manage all kinds of evaluators for computing metrics
 EVALUATORS = Registry('evaluator')