mmpretrain/tests/test_evaluation/test_metrics/test_voc_metrics.py

229 lines
8.1 KiB
Python
Raw Normal View History

# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import numpy as np
import sklearn.metrics
import torch
from mmengine.evaluator import Evaluator
from mmcls.structures import ClsDataSample
from mmcls.utils import register_all_modules
register_all_modules()
class TestVOCMultiLabel(TestCase):
def test_evaluate(self):
# prepare input data
y_true_label = [[0], [1, 3], [0, 1, 2], [3]]
y_true_difficult = [[0], [2], [1], []]
y_pred_score = torch.tensor([
[0.8, 0, 0, 0.6],
[0.2, 0, 0.6, 0],
[0, 0.9, 0.6, 0],
[0, 0, 0.2, 0.3],
])
# generate data samples
pred = [
ClsDataSample(num_classes=4).set_pred_score(i).set_gt_label(j)
for i, j in zip(y_pred_score, y_true_label)
]
for sample, difficult_label in zip(pred, y_true_difficult):
sample.set_metainfo({'gt_label_difficult': difficult_label})
# 1. Test with default argument
evaluator = Evaluator(dict(type='VOCMultiLabelMetric'))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# generate sklearn input
y_true = np.array([
[1, 0, 0, 0],
[0, 1, -1, 1],
[1, 1, 1, 0],
[0, 0, 0, 1],
])
ignored_index = y_true == -1
y_true[ignored_index] = 0
thr05_y_pred = np.array([
[1, 0, 0, 1],
[0, 0, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0],
])
thr05_y_pred[ignored_index] = 0
expect_precision = sklearn.metrics.precision_score(
y_true, thr05_y_pred, average='macro') * 100
expect_recall = sklearn.metrics.recall_score(
y_true, thr05_y_pred, average='macro') * 100
expect_f1 = sklearn.metrics.f1_score(
y_true, thr05_y_pred, average='macro') * 100
self.assertEqual(res['multi-label/precision'], expect_precision)
self.assertEqual(res['multi-label/recall'], expect_recall)
# precision is different between torch and sklearn
self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)
# 2. Test with `difficult_as_positive`=False argument
evaluator = Evaluator(
dict(type='VOCMultiLabelMetric', difficult_as_positive=False))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# generate sklearn input
y_true = np.array([
[1, 0, 0, 0],
[0, 1, 0, 1],
[1, 1, 1, 0],
[0, 0, 0, 1],
])
thr05_y_pred = np.array([
[1, 0, 0, 1],
[0, 0, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0],
])
expect_precision = sklearn.metrics.precision_score(
y_true, thr05_y_pred, average='macro') * 100
expect_recall = sklearn.metrics.recall_score(
y_true, thr05_y_pred, average='macro') * 100
expect_f1 = sklearn.metrics.f1_score(
y_true, thr05_y_pred, average='macro') * 100
self.assertEqual(res['multi-label/precision'], expect_precision)
self.assertEqual(res['multi-label/recall'], expect_recall)
# precision is different between torch and sklearn
self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)
# 3. Test with `difficult_as_positive`=True argument
evaluator = Evaluator(
dict(type='VOCMultiLabelMetric', difficult_as_positive=True))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# generate sklearn input
y_true = np.array([
[1, 0, 0, 0],
[0, 1, 1, 1],
[1, 1, 1, 0],
[0, 0, 0, 1],
])
thr05_y_pred = np.array([
[1, 0, 0, 1],
[0, 0, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0],
])
expect_precision = sklearn.metrics.precision_score(
y_true, thr05_y_pred, average='macro') * 100
expect_recall = sklearn.metrics.recall_score(
y_true, thr05_y_pred, average='macro') * 100
expect_f1 = sklearn.metrics.f1_score(
y_true, thr05_y_pred, average='macro') * 100
self.assertEqual(res['multi-label/precision'], expect_precision)
self.assertEqual(res['multi-label/recall'], expect_recall)
# precision is different between torch and sklearn
self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)
class TestVOCAveragePrecision(TestCase):
def test_evaluate(self):
"""Test using the metric in the same way as Evalutor."""
# prepare input data
y_true_difficult = [[0], [2], [1], []]
y_pred_score = torch.tensor([
[0.8, 0.1, 0, 0.6],
[0.2, 0.2, 0.7, 0],
[0.1, 0.9, 0.6, 0.1],
[0, 0, 0.2, 0.3],
])
y_true_label = [[0], [1, 3], [0, 1, 2], [3]]
y_true = torch.tensor([
[1, 0, 0, 0],
[0, 1, 0, 1],
[1, 1, 1, 0],
[0, 0, 0, 1],
])
y_true_difficult = [[0], [2], [1], []]
# generate data samples
pred = [
ClsDataSample(num_classes=4).set_pred_score(i).set_gt_score(
j).set_gt_label(k)
for i, j, k in zip(y_pred_score, y_true, y_true_label)
]
for sample, difficult_label in zip(pred, y_true_difficult):
sample.set_metainfo({'gt_label_difficult': difficult_label})
# 1. Test with default
evaluator = Evaluator(dict(type='VOCAveragePrecision'))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# prepare inputs for sklearn for this case
y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0], [0, 0.6, 0.2],
[0.6, 0, 0.1, 0.3]]
y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 0], [0, 1, 0, 1]]
expected_res = []
for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
expected_res.append(
sklearn.metrics.average_precision_score(
gt_per_class, pred_per_class))
self.assertAlmostEqual(
res['multi-label/mAP'],
sum(expected_res) * 100 / len(expected_res),
places=4)
# 2. Test with `difficult_as_positive`=False argument
evaluator = Evaluator(
dict(type='VOCAveragePrecision', difficult_as_positive=False))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# prepare inputs for sklearn for this case
y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],
[0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]
y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 0, 1, 0], [0, 1, 0, 1]]
expected_res = []
for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
expected_res.append(
sklearn.metrics.average_precision_score(
gt_per_class, pred_per_class))
self.assertAlmostEqual(
res['multi-label/mAP'],
sum(expected_res) * 100 / len(expected_res),
places=4)
# 3. Test with `difficult_as_positive`=True argument
evaluator = Evaluator(
dict(type='VOCAveragePrecision', difficult_as_positive=True))
evaluator.process(pred)
res = evaluator.evaluate(4)
self.assertIsInstance(res, dict)
# prepare inputs for sklearn for this case
y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],
[0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]
y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 0, 1]]
expected_res = []
for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
expected_res.append(
sklearn.metrics.average_precision_score(
gt_per_class, pred_per_class))
self.assertAlmostEqual(
res['multi-label/mAP'],
sum(expected_res) * 100 / len(expected_res),
places=4)