[Feature] Support metric prefix in BaseEvaluator (#82)

* support metric prefix in BaseEvaluator

* update docs

* use class attribute default_prefix
This commit is contained in:
Yining Li 2022-03-08 15:12:11 +08:00 committed by GitHub
parent 824be950b9
commit cfccabc657
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 93 additions and 27 deletions

View File

@ -46,15 +46,17 @@ validation_cfg=dict(
) )
``` ```
使用多个评测器时,可能出现评测指标同名的情况。比如,在下面的例子中使用了 2 个参数不同的分类正确率评测器,它们对应的评测指标都是 accuracy。此时为了避免歧义需要给评测器设置 `prefix` 参数。评测器的 `prefix` 会自动添加在评测指标名称的开头,从而使同名的评测指标可以区分 使用多个评测器时,可能出现评测指标同名的情况。比如,在下面的例子中使用了 2 个 `COCOEvaluator` 分别对检测框和关键点的预测结果进行评测,它们的评测指标都包括 `AP``AR` 等。为了避免同名评测指标引发歧义,`Evaluator` 中支持通过 `prefix` 参数为评测指标名增加前缀。通常,一个 `Evaluator` 会有默认的前缀,用户也可以在配置文件中进行指定
```python ```python
validation_cfg=dict( validation_cfg=dict(
evaluator=[ evaluator=[
dict(type='Accuracy', top_k=1, prefix='top1'), dict(type='COCO', iou_type='bbox'), # 使用默认前缀 `COCO`
dict(type='Accuracy', top_k=5, prefix='top5') dict(type='COCO', iou_type='keypoints', prefix='COCOKpts') # 自定义前缀 `COCOKpts`
], ],
main_metric='top1_accuracy', # 前缀 'top1' 被自动添加进指标名称中,用以区分同名指标 # 指定使用前缀为 COCO 的 AP 为主要评测指标
# 在没有重名指标歧义的情况下,此处可以不写前缀,只写评测指标名
main_metric='COCO.AP',
interval=10, interval=10,
by_epoch=True, by_epoch=True,
) )
@ -85,7 +87,9 @@ validation_cfg=dict(
`process()` 方法有 2 个输入参数,分别是测试数据样本`data_samples`和模型预测结果 `predictions`。我们从中分别取出样本类别标签和分类预测结果,并存放在 `self.results` 中。 `process()` 方法有 2 个输入参数,分别是测试数据样本`data_samples`和模型预测结果 `predictions`。我们从中分别取出样本类别标签和分类预测结果,并存放在 `self.results` 中。
`compute_metrics()`方法有 1 个输入参数 `results`,里面存放了所有批次测试数据经过 `process()` 方法处理后得到的结果。从中取出样本类别标签和分类预测结果,即可计算得到分类正确率 `acc`。最终,将计算得到的评测指标以字典的形式返回。 `compute_metrics()` 方法有 1 个输入参数 `results`,里面存放了所有批次测试数据经过 `process()` 方法处理后得到的结果。从中取出样本类别标签和分类预测结果,即可计算得到分类正确率 `acc`。最终,将计算得到的评测指标以字典的形式返回。
此外,我们建议在子类中为类属性 `default_prefix` 赋值。如果在初始化参数(即 config 中)没有指定 `prefix`,则会自动使用 `default_prefix` 作为评测指标名的前缀。同时,应在 docstring 中说明该评测器的 `default_prefix` 值以及所有的评测指标。
具体的实现如下: 具体的实现如下:
@ -97,6 +101,15 @@ import numpy as np
@EVALUATORS.register_module() @EVALUATORS.register_module()
class Accuracy(BaseEvaluator): class Accuracy(BaseEvaluator):
""" Accuracy Evaluator
Default prefix: ACC
Metrics:
- accuracy: classification accuracy
"""
default_prefix = 'ACC'
def process(self, data_samples: Dict, predictions: Dict): def process(self, data_samples: Dict, predictions: Dict):
"""Process one batch of data and predictions. The processed """Process one batch of data and predictions. The processed

View File

@ -22,13 +22,24 @@ class BaseEvaluator(metaclass=ABCMeta):
Then it collects all results together from all ranks if distributed Then it collects all results together from all ranks if distributed
training is used. Finally, it computes the metrics of the entire dataset. training is used. Finally, it computes the metrics of the entire dataset.
A subclass of class:`BaseEvaluator` should assign a meanful value to the
class attribute `default_prefix`. See the argument `prefix` for details.
Args: Args:
collect_device (str): Device name used for collecting results from collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'. 'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
""" """
def __init__(self, collect_device: str = 'cpu') -> None: default_prefix: Optional[str] = None
def __init__(self,
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
self._dataset_meta: Union[None, dict] = None self._dataset_meta: Union[None, dict] = None
self.collect_device = collect_device self.collect_device = collect_device
self.results: List[Any] = [] self.results: List[Any] = []
@ -37,6 +48,11 @@ class BaseEvaluator(metaclass=ABCMeta):
self.rank = rank self.rank = rank
self.world_size = world_size self.world_size = world_size
self.prefix = prefix or self.default_prefix
if self.prefix is None:
warnings.warn('The prefix is not set in evaluator class '
f'{self.__class__.__name__}.')
@property @property
def dataset_meta(self) -> Optional[dict]: def dataset_meta(self) -> Optional[dict]:
return self._dataset_meta return self._dataset_meta
@ -97,9 +113,17 @@ class BaseEvaluator(metaclass=ABCMeta):
if self.rank == 0: if self.rank == 0:
# TODO: replace with mmengine.dist.master_only # TODO: replace with mmengine.dist.master_only
metrics = [self.compute_metrics(results)] metrics = self.compute_metrics(results)
# Add prefix to metric names
if self.prefix:
metrics = {
'.'.join((self.prefix, k)): v
for k, v in metrics.items()
}
metrics = [metrics] # type: ignore
else: else:
metrics = [None] # type: ignore metrics = [None] # type: ignore
# TODO: replace with mmengine.dist.broadcast # TODO: replace with mmengine.dist.broadcast
if self.world_size > 1: if self.world_size > 1:
metrics = dist.broadcast_object_list(metrics) metrics = dist.broadcast_object_list(metrics)

View File

@ -6,7 +6,7 @@ from .base import BaseEvaluator
class ComposedEvaluator: class ComposedEvaluator:
"""Wrapper class to compose multiple :class:`DatasetEvaluator` instances. """Wrapper class to compose multiple :class:`BaseEvaluator` instances.
Args: Args:
evaluators (Sequence[BaseEvaluator]): The evaluators to compose. evaluators (Sequence[BaseEvaluator]): The evaluators to compose.

View File

@ -5,17 +5,35 @@ from unittest import TestCase
import numpy as np import numpy as np
from mmengine.evaluator import BaseEvaluator, ComposedEvaluator from mmengine.evaluator import BaseEvaluator, build_evaluator
from mmengine.registry import EVALUATORS from mmengine.registry import EVALUATORS
@EVALUATORS.register_module() @EVALUATORS.register_module()
class ToyEvaluator(BaseEvaluator): class ToyEvaluator(BaseEvaluator):
"""Evaluaotr that calculates the metric `accuracy` from predictions and
labels. Alternatively, this evaluator can return arbitrary dummy metrics
set in the config.
Default prefix: Toy
Metrics:
- accuracy (float): The classification accuracy. Only when
`dummy_metrics` is None.
- size (int): The number of test samples. Only when `dummy_metrics`
is None.
If `dummy_metrics` is set as a dict in the config, it will be
returned as the metrics and override `accuracy` and `size`.
"""
default_prefix = 'Toy'
def __init__(self, def __init__(self,
collect_device: str = 'cpu', collect_device: str = 'cpu',
prefix: Optional[str] = None,
dummy_metrics: Optional[Dict] = None): dummy_metrics: Optional[Dict] = None):
super().__init__(collect_device=collect_device) super().__init__(collect_device=collect_device, prefix=prefix)
self.dummy_metrics = dummy_metrics self.dummy_metrics = dummy_metrics
def process(self, data_samples, predictions): def process(self, data_samples, predictions):
@ -39,6 +57,18 @@ class ToyEvaluator(BaseEvaluator):
return metrics return metrics
@EVALUATORS.register_module()
class UnprefixedEvaluator(BaseEvaluator):
"""Evaluator with unassigned `default_prefix` to test the warning
information."""
def process(self, data_samples: dict, predictions: dict) -> None:
pass
def compute_metrics(self, results: list) -> dict:
return dict(dummy=0.0)
def generate_test_results(size, batch_size, pred, label): def generate_test_results(size, batch_size, pred, label):
num_batch = math.ceil(size / batch_size) num_batch = math.ceil(size / batch_size)
bs_residual = size % batch_size bs_residual = size % batch_size
@ -51,16 +81,9 @@ def generate_test_results(size, batch_size, pred, label):
class TestBaseEvaluator(TestCase): class TestBaseEvaluator(TestCase):
def build_evaluator(self, cfg):
if isinstance(cfg, (list, tuple)):
evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg]
return ComposedEvaluator(evaluators=evaluators)
else:
return EVALUATORS.build(cfg)
def test_single_evaluator(self): def test_single_evaluator(self):
cfg = dict(type='ToyEvaluator') cfg = dict(type='ToyEvaluator')
evaluator = self.build_evaluator(cfg) evaluator = build_evaluator(cfg)
size = 10 size = 10
batch_size = 4 batch_size = 4
@ -70,12 +93,12 @@ class TestBaseEvaluator(TestCase):
evaluator.process(data_samples, predictions) evaluator.process(data_samples, predictions)
metrics = evaluator.evaluate(size=size) metrics = evaluator.evaluate(size=size)
self.assertAlmostEqual(metrics['accuracy'], 1.0) self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0)
self.assertEqual(metrics['size'], size) self.assertEqual(metrics['Toy.size'], size)
# Test empty results # Test empty results
cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0)) cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0))
evaluator = self.build_evaluator(cfg) evaluator = build_evaluator(cfg)
with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'): with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'):
evaluator.evaluate(0) evaluator.evaluate(0)
@ -85,7 +108,7 @@ class TestBaseEvaluator(TestCase):
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
] ]
evaluator = self.build_evaluator(cfg) evaluator = build_evaluator(cfg)
size = 10 size = 10
batch_size = 4 batch_size = 4
@ -96,9 +119,9 @@ class TestBaseEvaluator(TestCase):
metrics = evaluator.evaluate(size=size) metrics = evaluator.evaluate(size=size)
self.assertAlmostEqual(metrics['accuracy'], 1.0) self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0)
self.assertAlmostEqual(metrics['mAP'], 0.0) self.assertAlmostEqual(metrics['Toy.mAP'], 0.0)
self.assertEqual(metrics['size'], size) self.assertEqual(metrics['Toy.size'], size)
def test_ambiguate_metric(self): def test_ambiguate_metric(self):
@ -107,7 +130,7 @@ class TestBaseEvaluator(TestCase):
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
] ]
evaluator = self.build_evaluator(cfg) evaluator = build_evaluator(cfg)
size = 10 size = 10
batch_size = 4 batch_size = 4
@ -129,8 +152,14 @@ class TestBaseEvaluator(TestCase):
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)) dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
] ]
evaluator = self.build_evaluator(cfg) evaluator = build_evaluator(cfg)
evaluator.dataset_meta = dataset_meta evaluator.dataset_meta = dataset_meta
self.assertDictEqual(evaluator.dataset_meta, dataset_meta)
for _evaluator in evaluator.evaluators: for _evaluator in evaluator.evaluators:
self.assertDictEqual(_evaluator.dataset_meta, dataset_meta) self.assertDictEqual(_evaluator.dataset_meta, dataset_meta)
def test_prefix(self):
cfg = dict(type='UnprefixedEvaluator')
with self.assertWarnsRegex(UserWarning, 'The prefix is not set'):
_ = build_evaluator(cfg)