[Feature] Add evaluator unittest (#46)

* add evaluator unittest

* update composed evaluator cfg

* add building function

* resolve comments

* resolve comments

* resolve comments

* fix evaluator doc
pull/62/head
Yining Li 2022-03-01 12:00:19 +08:00 committed by GitHub
parent 7313173405
commit 42448425b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 143 additions and 7 deletions

View File

@ -37,8 +37,8 @@ validation_cfg=dict(
```python
validation_cfg=dict(
evaluator=[
dict(type='accuracy', top_k=1), # 使用分类正确率评测器
dict(type='f1_score') # 使用 F1_score 评测器
dict(type='Accuracy', top_k=1), # 使用分类正确率评测器
dict(type='F1Score') # 使用 F1_score 评测器
],
main_metric='accuracy'
interval=10,
@ -51,8 +51,8 @@ validation_cfg=dict(
```python
validation_cfg=dict(
evaluator=[
dict(type='accuracy', top_k=1, prefix='top1'),
dict(type='accuracy', top_k=5, prefix='top5')
dict(type='Accuracy', top_k=1, prefix='top1'),
dict(type='Accuracy', top_k=5, prefix='top5')
],
main_metric='top1_accuracy', # 前缀 'top1' 被自动添加进指标名称中,用以区分同名指标
interval=10,
@ -62,7 +62,7 @@ validation_cfg=dict(
## 增加自定义评测器
在 OpenMMLab 的各个算法库中,已经实现了对应方向的常用评测器。如 MMDetection 中提供了 COCO 评测器MMClassification 中提供了 accuracy、f1_score 等评测器等。
在 OpenMMLab 的各个算法库中,已经实现了对应方向的常用评测器。如 MMDetection 中提供了 COCO 评测器MMClassification 中提供了 Accuracy、F1Score 等评测器等。
用户也可以根据自身需求,增加自定义的评测器。在实现自定义评测器时,用户需要继承 MMEngine 中提供的评测器基类 [BaseEvaluator](Todo:baseevaluator-doc-link),并实现对应的抽象方法。
@ -96,8 +96,8 @@ from mmengine.registry import EVALUATORS
import numpy as np
@EVALUATORS.register_module()
class AccuracyEvaluator(BaseEvaluator):
class Accuracy(BaseEvaluator):
def process(self, data_samples: Dict, predictions: Dict):
"""Process one batch of data and predictions. The processed
Results should be stored in `self.results`, which will be used

View File

@ -0,0 +1,136 @@
# Copyright (c) OpenMMLab. All rights reserved.
import math
from typing import Dict, List, Optional
from unittest import TestCase
import numpy as np
from mmengine.evaluator import BaseEvaluator, ComposedEvaluator
from mmengine.registry import EVALUATORS
@EVALUATORS.register_module()
class ToyEvaluator(BaseEvaluator):
def __init__(self,
collect_device: str = 'cpu',
dummy_metrics: Optional[Dict] = None):
super().__init__(collect_device=collect_device)
self.dummy_metrics = dummy_metrics
def process(self, data_samples, predictions):
result = {'pred': predictions['pred'], 'label': data_samples['label']}
self.results.append(result)
def compute_metrics(self, results: List):
if self.dummy_metrics is not None:
assert isinstance(self.dummy_metrics, dict)
return self.dummy_metrics.copy()
pred = np.concatenate([result['pred'] for result in results])
label = np.concatenate([result['label'] for result in results])
acc = (pred == label).sum() / pred.size
metrics = {
'accuracy': acc,
'size': pred.size, # To check the number of testing samples
}
return metrics
def generate_test_results(size, batch_size, pred, label):
num_batch = math.ceil(size / batch_size)
bs_residual = size % batch_size
for i in range(num_batch):
bs = bs_residual if i == num_batch - 1 else batch_size
data_samples = {'label': np.full(bs, label)}
predictions = {'pred': np.full(bs, pred)}
yield (data_samples, predictions)
class TestBaseEvaluator(TestCase):
def build_evaluator(self, cfg):
if isinstance(cfg, (list, tuple)):
evaluators = [EVALUATORS.build(_cfg) for _cfg in cfg]
return ComposedEvaluator(evaluators=evaluators)
else:
return EVALUATORS.build(cfg)
def test_single_evaluator(self):
cfg = dict(type='ToyEvaluator')
evaluator = self.build_evaluator(cfg)
size = 10
batch_size = 4
for data_samples, predictions in generate_test_results(
size, batch_size, pred=1, label=1):
evaluator.process(data_samples, predictions)
metrics = evaluator.evaluate(size=size)
self.assertAlmostEqual(metrics['accuracy'], 1.0)
self.assertEqual(metrics['size'], size)
# Test empty results
cfg = dict(type='ToyEvaluator', dummy_metrics=dict(accuracy=1.0))
evaluator = self.build_evaluator(cfg)
with self.assertWarnsRegex(UserWarning, 'got empty `self._results`.'):
evaluator.evaluate(0)
def test_composed_evaluator(self):
cfg = [
dict(type='ToyEvaluator'),
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
]
evaluator = self.build_evaluator(cfg)
size = 10
batch_size = 4
for data_samples, predictions in generate_test_results(
size, batch_size, pred=1, label=1):
evaluator.process(data_samples, predictions)
metrics = evaluator.evaluate(size=size)
self.assertAlmostEqual(metrics['accuracy'], 1.0)
self.assertAlmostEqual(metrics['mAP'], 0.0)
self.assertEqual(metrics['size'], size)
def test_ambiguate_metric(self):
cfg = [
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0)),
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
]
evaluator = self.build_evaluator(cfg)
size = 10
batch_size = 4
for data_samples, predictions in generate_test_results(
size, batch_size, pred=1, label=1):
evaluator.process(data_samples, predictions)
with self.assertRaisesRegex(
ValueError,
'There are multiple evaluators with the same metric name'):
_ = evaluator.evaluate(size=size)
def test_dataset_meta(self):
dataset_meta = dict(classes=('cat', 'dog'))
cfg = [
dict(type='ToyEvaluator'),
dict(type='ToyEvaluator', dummy_metrics=dict(mAP=0.0))
]
evaluator = self.build_evaluator(cfg)
evaluator.dataset_meta = dataset_meta
for _evaluator in evaluator.evaluators:
self.assertDictEqual(_evaluator.dataset_meta, dataset_meta)