mmpretrain/tests/test_evaluation/test_metrics/test_voc_metrics.py

# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase

import numpy as np
import sklearn.metrics
import torch
from mmengine.evaluator import Evaluator

from mmcls.structures import ClsDataSample
from mmcls.utils import register_all_modules

register_all_modules()


class TestVOCMultiLabel(TestCase):

    def test_evaluate(self):
        # prepare input data
        y_true_label = [[0], [1, 3], [0, 1, 2], [3]]
        y_true_difficult = [[0], [2], [1], []]
        y_pred_score = torch.tensor([
            [0.8, 0, 0, 0.6],
            [0.2, 0, 0.6, 0],
            [0, 0.9, 0.6, 0],
            [0, 0, 0.2, 0.3],
        ])

        # generate data samples
        pred = [
            ClsDataSample(num_classes=4).set_pred_score(i).set_gt_label(j)
            for i, j in zip(y_pred_score, y_true_label)
        ]
        for sample, difficult_label in zip(pred, y_true_difficult):
            sample.set_metainfo({'gt_label_difficult': difficult_label})

        # 1. Test with default argument
        evaluator = Evaluator(dict(type='VOCMultiLabelMetric'))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # generate sklearn input
        y_true = np.array([
            [1, 0, 0, 0],
            [0, 1, -1, 1],
            [1, 1, 1, 0],
            [0, 0, 0, 1],
        ])
        ignored_index = y_true == -1
        y_true[ignored_index] = 0
        thr05_y_pred = np.array([
            [1, 0, 0, 1],
            [0, 0, 1, 0],
            [0, 1, 1, 0],
            [0, 0, 0, 0],
        ])
        thr05_y_pred[ignored_index] = 0

        expect_precision = sklearn.metrics.precision_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_recall = sklearn.metrics.recall_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_f1 = sklearn.metrics.f1_score(
            y_true, thr05_y_pred, average='macro') * 100
        self.assertEqual(res['multi-label/precision'], expect_precision)
        self.assertEqual(res['multi-label/recall'], expect_recall)
        # precision is different between torch and sklearn
        self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)

        # 2. Test with `difficult_as_positive`=False argument
        evaluator = Evaluator(
            dict(type='VOCMultiLabelMetric', difficult_as_positive=False))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # generate sklearn input
        y_true = np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 1],
            [1, 1, 1, 0],
            [0, 0, 0, 1],
        ])
        thr05_y_pred = np.array([
            [1, 0, 0, 1],
            [0, 0, 1, 0],
            [0, 1, 1, 0],
            [0, 0, 0, 0],
        ])

        expect_precision = sklearn.metrics.precision_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_recall = sklearn.metrics.recall_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_f1 = sklearn.metrics.f1_score(
            y_true, thr05_y_pred, average='macro') * 100
        self.assertEqual(res['multi-label/precision'], expect_precision)
        self.assertEqual(res['multi-label/recall'], expect_recall)
        # precision is different between torch and sklearn
        self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)

        # 3. Test with `difficult_as_positive`=True argument
        evaluator = Evaluator(
            dict(type='VOCMultiLabelMetric', difficult_as_positive=True))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # generate sklearn input
        y_true = np.array([
            [1, 0, 0, 0],
            [0, 1, 1, 1],
            [1, 1, 1, 0],
            [0, 0, 0, 1],
        ])
        thr05_y_pred = np.array([
            [1, 0, 0, 1],
            [0, 0, 1, 0],
            [0, 1, 1, 0],
            [0, 0, 0, 0],
        ])

        expect_precision = sklearn.metrics.precision_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_recall = sklearn.metrics.recall_score(
            y_true, thr05_y_pred, average='macro') * 100
        expect_f1 = sklearn.metrics.f1_score(
            y_true, thr05_y_pred, average='macro') * 100
        self.assertEqual(res['multi-label/precision'], expect_precision)
        self.assertEqual(res['multi-label/recall'], expect_recall)
        # precision is different between torch and sklearn
        self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)


class TestVOCAveragePrecision(TestCase):

    def test_evaluate(self):
        """Test using the metric in the same way as Evalutor."""
        # prepare input data
        y_true_difficult = [[0], [2], [1], []]
        y_pred_score = torch.tensor([
            [0.8, 0.1, 0, 0.6],
            [0.2, 0.2, 0.7, 0],
            [0.1, 0.9, 0.6, 0.1],
            [0, 0, 0.2, 0.3],
        ])
        y_true_label = [[0], [1, 3], [0, 1, 2], [3]]
        y_true = torch.tensor([
            [1, 0, 0, 0],
            [0, 1, 0, 1],
            [1, 1, 1, 0],
            [0, 0, 0, 1],
        ])
        y_true_difficult = [[0], [2], [1], []]

        # generate data samples
        pred = [
            ClsDataSample(num_classes=4).set_pred_score(i).set_gt_score(
                j).set_gt_label(k)
            for i, j, k in zip(y_pred_score, y_true, y_true_label)
        ]
        for sample, difficult_label in zip(pred, y_true_difficult):
            sample.set_metainfo({'gt_label_difficult': difficult_label})

        # 1. Test with default
        evaluator = Evaluator(dict(type='VOCAveragePrecision'))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # prepare inputs for sklearn for this case
        y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0], [0, 0.6, 0.2],
                        [0.6, 0, 0.1, 0.3]]
        y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 0], [0, 1, 0, 1]]
        expected_res = []
        for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
            expected_res.append(
                sklearn.metrics.average_precision_score(
                    gt_per_class, pred_per_class))

        self.assertAlmostEqual(
            res['multi-label/mAP'],
            sum(expected_res) * 100 / len(expected_res),
            places=4)

        # 2. Test with `difficult_as_positive`=False argument
        evaluator = Evaluator(
            dict(type='VOCAveragePrecision', difficult_as_positive=False))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # prepare inputs for sklearn for this case
        y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],
                        [0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]
        y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 0, 1, 0], [0, 1, 0, 1]]
        expected_res = []
        for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
            expected_res.append(
                sklearn.metrics.average_precision_score(
                    gt_per_class, pred_per_class))

        self.assertAlmostEqual(
            res['multi-label/mAP'],
            sum(expected_res) * 100 / len(expected_res),
            places=4)

        # 3. Test with `difficult_as_positive`=True argument
        evaluator = Evaluator(
            dict(type='VOCAveragePrecision', difficult_as_positive=True))
        evaluator.process(pred)
        res = evaluator.evaluate(4)
        self.assertIsInstance(res, dict)

        # prepare inputs for sklearn for this case
        y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],
                        [0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]
        y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 0, 1]]
        expected_res = []
        for pred_per_class, gt_per_class in zip(y_pred_score, y_true):
            expected_res.append(
                sklearn.metrics.average_precision_score(
                    gt_per_class, pred_per_class))

        self.assertAlmostEqual(
            res['multi-label/mAP'],
            sum(expected_res) * 100 / len(expected_res),
            places=4)
[Feature] Migrate CSRA head to 1.x. (#1177) * [Feat] add csra to 1x * minor fix * add voc metrics * refine * add unittest * minor fix * add more comments * Fix docs and metafile. * Fix docs. Co-authored-by: mzr1996 <mzr1996@163.com> 2022-11-21 10:39:16 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
			`from unittest import TestCase`

			`import numpy as np`
			`import sklearn.metrics`
			`import torch`
			`from mmengine.evaluator import Evaluator`

			`from mmcls.structures import ClsDataSample`
			`from mmcls.utils import register_all_modules`

			`register_all_modules()`


			`class TestVOCMultiLabel(TestCase):`

			`def test_evaluate(self):`
			`# prepare input data`
			`y_true_label = [[0], [1, 3], [0, 1, 2], [3]]`
			`y_true_difficult = [[0], [2], [1], []]`
			`y_pred_score = torch.tensor([`
			`[0.8, 0, 0, 0.6],`
			`[0.2, 0, 0.6, 0],`
			`[0, 0.9, 0.6, 0],`
			`[0, 0, 0.2, 0.3],`
			`])`

			`# generate data samples`
			`pred = [`
			`ClsDataSample(num_classes=4).set_pred_score(i).set_gt_label(j)`
			`for i, j in zip(y_pred_score, y_true_label)`
			`]`
			`for sample, difficult_label in zip(pred, y_true_difficult):`
			`sample.set_metainfo({'gt_label_difficult': difficult_label})`

			`# 1. Test with default argument`
			`evaluator = Evaluator(dict(type='VOCMultiLabelMetric'))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# generate sklearn input`
			`y_true = np.array([`
			`[1, 0, 0, 0],`
			`[0, 1, -1, 1],`
			`[1, 1, 1, 0],`
			`[0, 0, 0, 1],`
			`])`
			`ignored_index = y_true == -1`
			`y_true[ignored_index] = 0`
			`thr05_y_pred = np.array([`
			`[1, 0, 0, 1],`
			`[0, 0, 1, 0],`
			`[0, 1, 1, 0],`
			`[0, 0, 0, 0],`
			`])`
			`thr05_y_pred[ignored_index] = 0`

			`expect_precision = sklearn.metrics.precision_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_recall = sklearn.metrics.recall_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_f1 = sklearn.metrics.f1_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`self.assertEqual(res['multi-label/precision'], expect_precision)`
			`self.assertEqual(res['multi-label/recall'], expect_recall)`
			`# precision is different between torch and sklearn`
			`self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)`

			# 2. Test with `difficult_as_positive`=False argument
			`evaluator = Evaluator(`
			`dict(type='VOCMultiLabelMetric', difficult_as_positive=False))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# generate sklearn input`
			`y_true = np.array([`
			`[1, 0, 0, 0],`
			`[0, 1, 0, 1],`
			`[1, 1, 1, 0],`
			`[0, 0, 0, 1],`
			`])`
			`thr05_y_pred = np.array([`
			`[1, 0, 0, 1],`
			`[0, 0, 1, 0],`
			`[0, 1, 1, 0],`
			`[0, 0, 0, 0],`
			`])`

			`expect_precision = sklearn.metrics.precision_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_recall = sklearn.metrics.recall_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_f1 = sklearn.metrics.f1_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`self.assertEqual(res['multi-label/precision'], expect_precision)`
			`self.assertEqual(res['multi-label/recall'], expect_recall)`
			`# precision is different between torch and sklearn`
			`self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)`

			# 3. Test with `difficult_as_positive`=True argument
			`evaluator = Evaluator(`
			`dict(type='VOCMultiLabelMetric', difficult_as_positive=True))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# generate sklearn input`
			`y_true = np.array([`
			`[1, 0, 0, 0],`
			`[0, 1, 1, 1],`
			`[1, 1, 1, 0],`
			`[0, 0, 0, 1],`
			`])`
			`thr05_y_pred = np.array([`
			`[1, 0, 0, 1],`
			`[0, 0, 1, 0],`
			`[0, 1, 1, 0],`
			`[0, 0, 0, 0],`
			`])`

			`expect_precision = sklearn.metrics.precision_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_recall = sklearn.metrics.recall_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`expect_f1 = sklearn.metrics.f1_score(`
			`y_true, thr05_y_pred, average='macro') * 100`
			`self.assertEqual(res['multi-label/precision'], expect_precision)`
			`self.assertEqual(res['multi-label/recall'], expect_recall)`
			`# precision is different between torch and sklearn`
			`self.assertAlmostEqual(res['multi-label/f1-score'], expect_f1, 5)`


			`class TestVOCAveragePrecision(TestCase):`

			`def test_evaluate(self):`
			`"""Test using the metric in the same way as Evalutor."""`
			`# prepare input data`
			`y_true_difficult = [[0], [2], [1], []]`
			`y_pred_score = torch.tensor([`
			`[0.8, 0.1, 0, 0.6],`
			`[0.2, 0.2, 0.7, 0],`
			`[0.1, 0.9, 0.6, 0.1],`
			`[0, 0, 0.2, 0.3],`
			`])`
			`y_true_label = [[0], [1, 3], [0, 1, 2], [3]]`
			`y_true = torch.tensor([`
			`[1, 0, 0, 0],`
			`[0, 1, 0, 1],`
			`[1, 1, 1, 0],`
			`[0, 0, 0, 1],`
			`])`
			`y_true_difficult = [[0], [2], [1], []]`

			`# generate data samples`
			`pred = [`
			`ClsDataSample(num_classes=4).set_pred_score(i).set_gt_score(`
			`j).set_gt_label(k)`
			`for i, j, k in zip(y_pred_score, y_true, y_true_label)`
			`]`
			`for sample, difficult_label in zip(pred, y_true_difficult):`
			`sample.set_metainfo({'gt_label_difficult': difficult_label})`

			`# 1. Test with default`
			`evaluator = Evaluator(dict(type='VOCAveragePrecision'))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# prepare inputs for sklearn for this case`
			`y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0], [0, 0.6, 0.2],`
			`[0.6, 0, 0.1, 0.3]]`
			`y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 0], [0, 1, 0, 1]]`
			`expected_res = []`
			`for pred_per_class, gt_per_class in zip(y_pred_score, y_true):`
			`expected_res.append(`
			`sklearn.metrics.average_precision_score(`
			`gt_per_class, pred_per_class))`

			`self.assertAlmostEqual(`
			`res['multi-label/mAP'],`
			`sum(expected_res) * 100 / len(expected_res),`
			`places=4)`

			# 2. Test with `difficult_as_positive`=False argument
			`evaluator = Evaluator(`
			`dict(type='VOCAveragePrecision', difficult_as_positive=False))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# prepare inputs for sklearn for this case`
			`y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],`
			`[0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]`
			`y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 0, 1, 0], [0, 1, 0, 1]]`
			`expected_res = []`
			`for pred_per_class, gt_per_class in zip(y_pred_score, y_true):`
			`expected_res.append(`
			`sklearn.metrics.average_precision_score(`
			`gt_per_class, pred_per_class))`

			`self.assertAlmostEqual(`
			`res['multi-label/mAP'],`
			`sum(expected_res) * 100 / len(expected_res),`
			`places=4)`

			# 3. Test with `difficult_as_positive`=True argument
			`evaluator = Evaluator(`
			`dict(type='VOCAveragePrecision', difficult_as_positive=True))`
			`evaluator.process(pred)`
			`res = evaluator.evaluate(4)`
			`self.assertIsInstance(res, dict)`

			`# prepare inputs for sklearn for this case`
			`y_pred_score = [[0.8, 0.2, 0.1, 0], [0.1, 0.2, 0.9, 0],`
			`[0, 0.7, 0.6, 0.2], [0.6, 0, 0.1, 0.3]]`
			`y_true = [[1, 0, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 0, 1]]`
			`expected_res = []`
			`for pred_per_class, gt_per_class in zip(y_pred_score, y_true):`
			`expected_res.append(`
			`sklearn.metrics.average_precision_score(`
			`gt_per_class, pred_per_class))`

			`self.assertAlmostEqual(`
			`res['multi-label/mAP'],`
			`sum(expected_res) * 100 / len(expected_res),`
			`places=4)`