[Feature] Add evaluator unittest (#46)
* add evaluator unittest * update composed evaluator cfg * add building function * resolve comments * resolve comments * resolve comments * fix evaluator docpull/62/head
parent
7313173405
commit
42448425b3
|
@ -37,8 +37,8 @@ validation_cfg=dict(
|
|||
```python
|
||||
validation_cfg=dict(
|
||||
evaluator=[
|
||||
dict(type='accuracy', top_k=1), # 使用分类正确率评测器
|
||||
dict(type='f1_score') # 使用 F1_score 评测器
|
||||
dict(type='Accuracy', top_k=1), # 使用分类正确率评测器
|
||||
dict(type='F1Score') # 使用 F1_score 评测器
|
||||
],
|
||||
main_metric='accuracy'
|
||||
interval=10,
|
||||
|
@ -51,8 +51,8 @@ validation_cfg=dict(
|
|||
```python
|
||||
validation_cfg=dict(
|
||||
evaluator=[
|
||||
dict(type='accuracy', top_k=1, prefix='top1'),
|
||||
dict(type='accuracy', top_k=5, prefix='top5')
|
||||
dict(type='Accuracy', top_k=1, prefix='top1'),
|
||||
dict(type='Accuracy', top_k=5, prefix='top5')
|
||||
],
|
||||
main_metric='top1_accuracy', # 前缀 'top1' 被自动添加进指标名称中,用以区分同名指标
|
||||
interval=10,
|
||||
|
@ -62,7 +62,7 @@ validation_cfg=dict(
|
|||
|
||||
## 增加自定义评测器
|
||||
|
||||
在 OpenMMLab 的各个算法库中,已经实现了对应方向的常用评测器。如 MMDetection 中提供了 COCO 评测器,MMClassification 中提供了 accuracy、f1_score 等评测器等。
|
||||
在 OpenMMLab 的各个算法库中,已经实现了对应方向的常用评测器。如 MMDetection 中提供了 COCO 评测器,MMClassification 中提供了 Accuracy、F1Score 等评测器等。
|
||||
|
||||
用户也可以根据自身需求,增加自定义的评测器。在实现自定义评测器时,用户需要继承 MMEngine 中提供的评测器基类 [BaseEvaluator](Todo:baseevaluator-doc-link),并实现对应的抽象方法。
|
||||
|
||||
|
@ -96,8 +96,8 @@ from mmengine.registry import EVALUATORS
|
|||
import numpy as np
|
||||
|
||||
@EVALUATORS.register_module()
|
||||
class AccuracyEvaluator(BaseEvaluator):
|
||||
|
||||
class Accuracy(BaseEvaluator):
|
||||
|
||||
def process(self, data_samples: Dict, predictions: Dict):
|
||||
"""Process one batch of data and predictions. The processed
|
||||
Results should be stored in `self.results`, which will be used
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
from typing import Dict, List, Optional
|
||||
from unittest import TestCase
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mmengine.evaluator import BaseEvaluator, ComposedEvaluator
|
||||
from mmengine.registry import EVALUATORS
|
||||
|
||||
|
||||
@EVALUATORS.register_module()
|
||||
class ToyEvaluator(BaseEvaluator):
|
||||
|
||||
def __init__(self,
|
||||
collect_device: str = 'cpu',
|
||||
dummy_metrics: Optional[Dict] = None):
|
||||
super().__init__(collect_device=collect_device)
|
||||
self.dummy_metrics = dummy_metrics
|
||||
|
||||
def process(self, data_samples, predictions):
|
||||
result = {'pred': predictions['pred'], 'label': data_samples['label']}
|
||||
self.results.append(result)
|
||||
|
||||
def compute_metrics(self, results: List):
|
||||
if self.dummy_metrics is not None:
|
||||
assert isinstance(self.dummy_metrics, dict)
|
||||
return self.dummy_metrics.copy()
|
||||
|
||||
pred = np.concatenate([result['pred'] for result in results])
|
||||
label = np.concatenate([result['label'] for result in results])
|
||||
acc = (pred == label).sum() / pred.size
|
||||
|
||||
metrics = {
|
||||
'accuracy': acc,
|
||||
'size': pred.size, # To check the number of testing samples
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def generate_test_results(size, batch_size, pred, label):
|
||||
num_batch = math.ceil(size / batch_size)
|
||||
bs_residual = size % batch_size
|
||||
for i in range(num_batch):
|
||||
bs = bs_residual if i == num_batch - 1 else batch_size
|
||||
data_samples = {'label': np.full(bs, label)}
|
||||
predictions = {'pred': np.full(bs, pred)}
|
||||
yield (data_samples, predictions)
|
||||
|
||||
|
||||
class TestBaseEvaluator(TestCase):
|
||||
|
||||
def build_evaluator(self, cfg):
|
||||
if isinstance(cfg, (list, tuple)):
|
||||
evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg]
|
||||
return ComposedEvaluator(evaluators=evaluators)
|
||||
else:
|
||||
return EVALUATORS.build(cfg)
|
||||
|
||||
def test_single_evaluator(self):
|
||||
cfg = dict(type='ToyEvaluator')
|
||||
evaluator = self.build_evaluator(cfg)
|
||||
|
||||
size = 10
|
||||
batch_size = 4
|
||||
|
||||
for data_samples, predictions in generate_test_results(
|
||||
size, batch_size, pred=1, label=1):
|
||||
evaluator.process(data_samples, predictions)
|
||||
|
||||
metrics = evaluator.evaluate(size=size)
|
||||
self.assertAlmostEqual(metrics['accuracy'], 1.0)
|
||||
self.assertEqual(metrics['size'], size)
|
||||
|
||||
# Test empty results
|
||||
cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0))
|
||||
evaluator = self.build_evaluator(cfg)
|
||||
with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'):
|
||||
evaluator.evaluate(0)
|
||||
|
||||
def test_composed_evaluator(self):
|
||||
cfg = [
|
||||
dict(type='ToyEvaluator'),
|
||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||
]
|
||||
|
||||
evaluator = self.build_evaluator(cfg)
|
||||
|
||||
size = 10
|
||||
batch_size = 4
|
||||
|
||||
for data_samples, predictions in generate_test_results(
|
||||
size, batch_size, pred=1, label=1):
|
||||
evaluator.process(data_samples, predictions)
|
||||
|
||||
metrics = evaluator.evaluate(size=size)
|
||||
|
||||
self.assertAlmostEqual(metrics['accuracy'], 1.0)
|
||||
self.assertAlmostEqual(metrics['mAP'], 0.0)
|
||||
self.assertEqual(metrics['size'], size)
|
||||
|
||||
def test_ambiguate_metric(self):
|
||||
|
||||
cfg = [
|
||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)),
|
||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||
]
|
||||
|
||||
evaluator = self.build_evaluator(cfg)
|
||||
|
||||
size = 10
|
||||
batch_size = 4
|
||||
|
||||
for data_samples, predictions in generate_test_results(
|
||||
size, batch_size, pred=1, label=1):
|
||||
evaluator.process(data_samples, predictions)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
'There are multiple evaluators with the same metric name'):
|
||||
_ = evaluator.evaluate(size=size)
|
||||
|
||||
def test_dataset_meta(self):
|
||||
dataset_meta = dict(classes=('cat', 'dog'))
|
||||
|
||||
cfg = [
|
||||
dict(type='ToyEvaluator'),
|
||||
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
|
||||
]
|
||||
|
||||
evaluator = self.build_evaluator(cfg)
|
||||
evaluator.dataset_meta = dataset_meta
|
||||
|
||||
for _evaluator in evaluator.evaluators:
|
||||
self.assertDictEqual(_evaluator.dataset_meta, dataset_meta)
|
Loading…
Reference in New Issue