mirror of
https://github.com/open-mmlab/mmengine.git
synced 2025-06-03 21:54:44 +08:00
[Feature] Support metric prefix in BaseEvaluator (#82)
* support metric prefix in BaseEvaluator * update docs * use class attribute default_prefix
This commit is contained in:
parent
824be950b9
commit
cfccabc657
@ -46,15 +46,17 @@ validation_cfg=dict(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
在使用多个评测器时,可能会出现评测指标同名的情况。比如,在下面的例子中使用了 2 个参数不同的分类正确率评测器,它们对应的评测指标都是 accuracy。此时,为了避免歧义,需要给评测器设置 `prefix` 参数。评测器的 `prefix` 会自动添加在评测指标名称的开头,从而使同名的评测指标可以区分。
|
使用多个评测器时,可能出现评测指标同名的情况。比如,在下面的例子中使用了 2 个 `COCOEvaluator` 分别对检测框和关键点的预测结果进行评测,它们的评测指标都包括 `AP`,`AR` 等。为了避免同名评测指标引发歧义,`Evaluator` 中支持通过 `prefix` 参数为评测指标名增加前缀。通常,一个 `Evaluator` 会有默认的前缀,用户也可以在配置文件中进行指定。
|
||||||
|
|
||||||
```python
|
```python
|
||||||
validation_cfg=dict(
|
validation_cfg=dict(
|
||||||
evaluator=[
|
evaluator=[
|
||||||
dict(type='Accuracy', top_k=1, prefix='top1'),
|
dict(type='COCO', iou_type='bbox'), # 使用默认前缀 `COCO`
|
||||||
dict(type='Accuracy', top_k=5, prefix='top5')
|
dict(type='COCO', iou_type='keypoints', prefix='COCOKpts') # 自定义前缀 `COCOKpts`
|
||||||
],
|
],
|
||||||
main_metric='top1_accuracy', # 前缀 'top1' 被自动添加进指标名称中,用以区分同名指标
|
# 指定使用前缀为 COCO 的 AP 为主要评测指标
|
||||||
|
# 在没有重名指标歧义的情况下,此处可以不写前缀,只写评测指标名
|
||||||
|
main_metric='COCO.AP',
|
||||||
interval=10,
|
interval=10,
|
||||||
by_epoch=True,
|
by_epoch=True,
|
||||||
)
|
)
|
||||||
@ -85,7 +87,9 @@ validation_cfg=dict(
|
|||||||
|
|
||||||
`process()` 方法有 2 个输入参数,分别是测试数据样本`data_samples`和模型预测结果 `predictions`。我们从中分别取出样本类别标签和分类预测结果,并存放在 `self.results` 中。
|
`process()` 方法有 2 个输入参数,分别是测试数据样本`data_samples`和模型预测结果 `predictions`。我们从中分别取出样本类别标签和分类预测结果,并存放在 `self.results` 中。
|
||||||
|
|
||||||
`compute_metrics()`方法有 1 个输入参数 `results`,里面存放了所有批次测试数据经过 `process()` 方法处理后得到的结果。从中取出样本类别标签和分类预测结果,即可计算得到分类正确率 `acc`。最终,将计算得到的评测指标以字典的形式返回。
|
`compute_metrics()` 方法有 1 个输入参数 `results`,里面存放了所有批次测试数据经过 `process()` 方法处理后得到的结果。从中取出样本类别标签和分类预测结果,即可计算得到分类正确率 `acc`。最终,将计算得到的评测指标以字典的形式返回。
|
||||||
|
|
||||||
|
此外,我们建议在子类中为类属性 `default_prefix` 赋值。如果在初始化参数(即 config 中)没有指定 `prefix`,则会自动使用 `default_prefix` 作为评测指标名的前缀。同时,应在 docstring 中说明该评测器的 `default_prefix` 值以及所有的评测指标。
|
||||||
|
|
||||||
具体的实现如下:
|
具体的实现如下:
|
||||||
|
|
||||||
@ -97,6 +101,15 @@ import numpy as np
|
|||||||
|
|
||||||
@EVALUATORS.register_module()
|
@EVALUATORS.register_module()
|
||||||
class Accuracy(BaseEvaluator):
|
class Accuracy(BaseEvaluator):
|
||||||
|
""" Accuracy Evaluator
|
||||||
|
|
||||||
|
Default prefix: ACC
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
- accuracy: classification accuracy
|
||||||
|
"""
|
||||||
|
|
||||||
|
default_prefix = 'ACC'
|
||||||
|
|
||||||
def process(self, data_samples: Dict, predictions: Dict):
|
def process(self, data_samples: Dict, predictions: Dict):
|
||||||
"""Process one batch of data and predictions. The processed
|
"""Process one batch of data and predictions. The processed
|
||||||
|
@ -22,13 +22,24 @@ class BaseEvaluator(metaclass=ABCMeta):
|
|||||||
Then it collects all results together from all ranks if distributed
|
Then it collects all results together from all ranks if distributed
|
||||||
training is used. Finally, it computes the metrics of the entire dataset.
|
training is used. Finally, it computes the metrics of the entire dataset.
|
||||||
|
|
||||||
|
A subclass of class:`BaseEvaluator` should assign a meanful value to the
|
||||||
|
class attribute `default_prefix`. See the argument `prefix` for details.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
collect_device (str): Device name used for collecting results from
|
collect_device (str): Device name used for collecting results from
|
||||||
different ranks during distributed training. Must be 'cpu' or
|
different ranks during distributed training. Must be 'cpu' or
|
||||||
'gpu'. Defaults to 'cpu'.
|
'gpu'. Defaults to 'cpu'.
|
||||||
|
prefix (str, optional): The prefix that will be added in the metric
|
||||||
|
names to disambiguate homonymous metrics of different evaluators.
|
||||||
|
If prefix is not provided in the argument, self.default_prefix
|
||||||
|
will be used instead. Default: None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, collect_device: str = 'cpu') -> None:
|
default_prefix: Optional[str] = None
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
collect_device: str = 'cpu',
|
||||||
|
prefix: Optional[str] = None) -> None:
|
||||||
self._dataset_meta: Union[None, dict] = None
|
self._dataset_meta: Union[None, dict] = None
|
||||||
self.collect_device = collect_device
|
self.collect_device = collect_device
|
||||||
self.results: List[Any] = []
|
self.results: List[Any] = []
|
||||||
@ -37,6 +48,11 @@ class BaseEvaluator(metaclass=ABCMeta):
|
|||||||
self.rank = rank
|
self.rank = rank
|
||||||
self.world_size = world_size
|
self.world_size = world_size
|
||||||
|
|
||||||
|
self.prefix = prefix or self.default_prefix
|
||||||
|
if self.prefix is None:
|
||||||
|
warnings.warn('The prefix is not set in evaluator class '
|
||||||
|
f'{self.__class__.__name__}.')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset_meta(self) -> Optional[dict]:
|
def dataset_meta(self) -> Optional[dict]:
|
||||||
return self._dataset_meta
|
return self._dataset_meta
|
||||||
@ -97,9 +113,17 @@ class BaseEvaluator(metaclass=ABCMeta):
|
|||||||
|
|
||||||
if self.rank == 0:
|
if self.rank == 0:
|
||||||
# TODO: replace with mmengine.dist.master_only
|
# TODO: replace with mmengine.dist.master_only
|
||||||
metrics = [self.compute_metrics(results)]
|
metrics = self.compute_metrics(results)
|
||||||
|
# Add prefix to metric names
|
||||||
|
if self.prefix:
|
||||||
|
metrics = {
|
||||||
|
'.'.join((self.prefix, k)): v
|
||||||
|
for k, v in metrics.items()
|
||||||
|
}
|
||||||
|
metrics = [metrics] # type: ignore
|
||||||
else:
|
else:
|
||||||
metrics = [None] # type: ignore
|
metrics = [None] # type: ignore
|
||||||
|
|
||||||
# TODO: replace with mmengine.dist.broadcast
|
# TODO: replace with mmengine.dist.broadcast
|
||||||
if self.world_size > 1:
|
if self.world_size > 1:
|
||||||
metrics = dist.broadcast_object_list(metrics)
|
metrics = dist.broadcast_object_list(metrics)
|
||||||
|
@ -6,7 +6,7 @@ from .base import BaseEvaluator
|
|||||||
|
|
||||||
|
|
||||||
class ComposedEvaluator:
|
class ComposedEvaluator:
|
||||||
"""Wrapper class to compose multiple :class:`DatasetEvaluator` instances.
|
"""Wrapper class to compose multiple :class:`BaseEvaluator` instances.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
evaluators (Sequence[BaseEvaluator]): The evaluators to compose.
|
evaluators (Sequence[BaseEvaluator]): The evaluators to compose.
|
||||||
|
@ -5,17 +5,35 @@ from unittest import TestCase
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mmengine.evaluator import BaseEvaluator, ComposedEvaluator
|
from mmengine.evaluator import BaseEvaluator, build_evaluator
|
||||||
from mmengine.registry import EVALUATORS
|
from mmengine.registry import EVALUATORS
|
||||||
|
|
||||||
|
|
||||||
@EVALUATORS.register_module()
|
@EVALUATORS.register_module()
|
||||||
class ToyEvaluator(BaseEvaluator):
|
class ToyEvaluator(BaseEvaluator):
|
||||||
|
"""Evaluaotr that calculates the metric `accuracy` from predictions and
|
||||||
|
labels. Alternatively, this evaluator can return arbitrary dummy metrics
|
||||||
|
set in the config.
|
||||||
|
|
||||||
|
Default prefix: Toy
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
- accuracy (float): The classification accuracy. Only when
|
||||||
|
`dummy_metrics` is None.
|
||||||
|
- size (int): The number of test samples. Only when `dummy_metrics`
|
||||||
|
is None.
|
||||||
|
|
||||||
|
If `dummy_metrics` is set as a dict in the config, it will be
|
||||||
|
returned as the metrics and override `accuracy` and `size`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
default_prefix = 'Toy'
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
collect_device: str = 'cpu',
|
collect_device: str = 'cpu',
|
||||||
|
prefix: Optional[str] = None,
|
||||||
dummy_metrics: Optional[Dict] = None):
|
dummy_metrics: Optional[Dict] = None):
|
||||||
super().__init__(collect_device=collect_device)
|
super().__init__(collect_device=collect_device, prefix=prefix)
|
||||||
self.dummy_metrics = dummy_metrics
|
self.dummy_metrics = dummy_metrics
|
||||||
|
|
||||||
def process(self, data_samples, predictions):
|
def process(self, data_samples, predictions):
|
||||||
@ -39,6 +57,18 @@ class ToyEvaluator(BaseEvaluator):
|
|||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
|
@EVALUATORS.register_module()
|
||||||
|
class UnprefixedEvaluator(BaseEvaluator):
|
||||||
|
"""Evaluator with unassigned `default_prefix` to test the warning
|
||||||
|
information."""
|
||||||
|
|
||||||
|
def process(self, data_samples: dict, predictions: dict) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def compute_metrics(self, results: list) -> dict:
|
||||||
|
return dict(dummy=0.0)
|
||||||
|
|
||||||
|
|
||||||
def generate_test_results(size, batch_size, pred, label):
|
def generate_test_results(size, batch_size, pred, label):
|
||||||
num_batch = math.ceil(size / batch_size)
|
num_batch = math.ceil(size / batch_size)
|
||||||
bs_residual = size % batch_size
|
bs_residual = size % batch_size
|
||||||
@ -51,16 +81,9 @@ def generate_test_results(size, batch_size, pred, label):
|
|||||||
|
|
||||||
class TestBaseEvaluator(TestCase):
|
class TestBaseEvaluator(TestCase):
|
||||||
|
|
||||||
def build_evaluator(self, cfg):
|
|
||||||
if isinstance(cfg, (list, tuple)):
|
|
||||||
evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg]
|
|
||||||
return ComposedEvaluator(evaluators=evaluators)
|
|
||||||
else:
|
|
||||||
return EVALUATORS.build(cfg)
|
|
||||||
|
|
||||||
def test_single_evaluator(self):
|
def test_single_evaluator(self):
|
||||||
cfg = dict(type='ToyEvaluator')
|
cfg = dict(type='ToyEvaluator')
|
||||||
evaluator = self.build_evaluator(cfg)
|
evaluator = build_evaluator(cfg)
|
||||||
|
|
||||||
size = 10
|
size = 10
|
||||||
batch_size = 4
|
batch_size = 4
|
||||||
@ -70,12 +93,12 @@ class TestBaseEvaluator(TestCase):
|
|||||||
evaluator.process(data_samples, predictions)
|
evaluator.process(data_samples, predictions)
|
||||||
|
|
||||||
metrics = evaluator.evaluate(size=size)
|
metrics = evaluator.evaluate(size=size)
|
||||||
self.assertAlmostEqual(metrics['accuracy'], 1.0)
|
self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0)
|
||||||
self.assertEqual(metrics['size'], size)
|
self.assertEqual(metrics['Toy.size'], size)
|
||||||
|
|
||||||
# Test empty results
|
# Test empty results
|
||||||
cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0))
|
cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0))
|
||||||
evaluator = self.build_evaluator(cfg)
|
evaluator = build_evaluator(cfg)
|
||||||
with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'):
|
with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'):
|
||||||
evaluator.evaluate(0)
|
evaluator.evaluate(0)
|
||||||
|
|
||||||
@ -85,7 +108,7 @@ class TestBaseEvaluator(TestCase):
|
|||||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||||
]
|
]
|
||||||
|
|
||||||
evaluator = self.build_evaluator(cfg)
|
evaluator = build_evaluator(cfg)
|
||||||
|
|
||||||
size = 10
|
size = 10
|
||||||
batch_size = 4
|
batch_size = 4
|
||||||
@ -96,9 +119,9 @@ class TestBaseEvaluator(TestCase):
|
|||||||
|
|
||||||
metrics = evaluator.evaluate(size=size)
|
metrics = evaluator.evaluate(size=size)
|
||||||
|
|
||||||
self.assertAlmostEqual(metrics['accuracy'], 1.0)
|
self.assertAlmostEqual(metrics['Toy.accuracy'], 1.0)
|
||||||
self.assertAlmostEqual(metrics['mAP'], 0.0)
|
self.assertAlmostEqual(metrics['Toy.mAP'], 0.0)
|
||||||
self.assertEqual(metrics['size'], size)
|
self.assertEqual(metrics['Toy.size'], size)
|
||||||
|
|
||||||
def test_ambiguate_metric(self):
|
def test_ambiguate_metric(self):
|
||||||
|
|
||||||
@ -107,7 +130,7 @@ class TestBaseEvaluator(TestCase):
|
|||||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||||
]
|
]
|
||||||
|
|
||||||
evaluator = self.build_evaluator(cfg)
|
evaluator = build_evaluator(cfg)
|
||||||
|
|
||||||
size = 10
|
size = 10
|
||||||
batch_size = 4
|
batch_size = 4
|
||||||
@ -129,8 +152,14 @@ class TestBaseEvaluator(TestCase):
|
|||||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||||
]
|
]
|
||||||
|
|
||||||
evaluator = self.build_evaluator(cfg)
|
evaluator = build_evaluator(cfg)
|
||||||
evaluator.dataset_meta = dataset_meta
|
evaluator.dataset_meta = dataset_meta
|
||||||
|
|
||||||
|
self.assertDictEqual(evaluator.dataset_meta, dataset_meta)
|
||||||
for _evaluator in evaluator.evaluators:
|
for _evaluator in evaluator.evaluators:
|
||||||
self.assertDictEqual(_evaluator.dataset_meta, dataset_meta)
|
self.assertDictEqual(_evaluator.dataset_meta, dataset_meta)
|
||||||
|
|
||||||
|
def test_prefix(self):
|
||||||
|
cfg = dict(type='UnprefixedEvaluator')
|
||||||
|
with self.assertWarnsRegex(UserWarning, 'The prefix is not set'):
|
||||||
|
_ = build_evaluator(cfg)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user