diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py index 1b365f689..e6bae5344 100644 --- a/configs/_base_/datasets/ade20k.py +++ b/configs/_base_/datasets/ade20k.py @@ -43,3 +43,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py index 2392cd3e9..dbcbddf01 100644 --- a/configs/_base_/datasets/ade20k_640x640.py +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -43,3 +43,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py index 17f39b7e1..5c1be3dd4 100644 --- a/configs/_base_/datasets/chase_db1.py +++ b/configs/_base_/datasets/chase_db1.py @@ -47,3 +47,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index 893652f88..51936fed7 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -42,3 +42,6 @@ val_dataloader = dict( img_path='leftImg8bit/val', seg_map_path='gtFine/val'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_1024x1024.py b/configs/_base_/datasets/cityscapes_1024x1024.py index 9bff81433..3c79537bf 100644 --- a/configs/_base_/datasets/cityscapes_1024x1024.py +++ b/configs/_base_/datasets/cityscapes_1024x1024.py @@ -20,3 +20,6 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_768x768.py b/configs/_base_/datasets/cityscapes_768x768.py index 1eaa801a7..bb012a70a 100644 --- a/configs/_base_/datasets/cityscapes_768x768.py +++ b/configs/_base_/datasets/cityscapes_768x768.py @@ -20,3 +20,6 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_769x769.py b/configs/_base_/datasets/cityscapes_769x769.py index be0b80c76..44a329ac9 100644 --- a/configs/_base_/datasets/cityscapes_769x769.py +++ b/configs/_base_/datasets/cityscapes_769x769.py @@ -20,3 +20,6 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_832x832.py b/configs/_base_/datasets/cityscapes_832x832.py index 52dc58099..03fdc4d79 100644 --- a/configs/_base_/datasets/cityscapes_832x832.py +++ b/configs/_base_/datasets/cityscapes_832x832.py @@ -20,3 +20,6 @@ test_pipeline = [ train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py index ef824eba1..d36ed4752 100644 --- a/configs/_base_/datasets/coco-stuff10k.py +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -44,3 +44,6 @@ val_dataloader = dict( img_path='images/test2014', seg_map_path='annotations/test2014'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py index 4b986045b..eb012eece 100644 --- a/configs/_base_/datasets/coco-stuff164k.py +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -42,3 +42,6 @@ val_dataloader = dict( img_path='images/val2017', seg_map_path='annotations/val2017'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py index 00d17f51a..f90c90582 100644 --- a/configs/_base_/datasets/drive.py +++ b/configs/_base_/datasets/drive.py @@ -46,3 +46,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py index fd1b31156..43fc7a441 100644 --- a/configs/_base_/datasets/hrf.py +++ b/configs/_base_/datasets/hrf.py @@ -46,3 +46,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py index 4c7371591..cdf426761 100644 --- a/configs/_base_/datasets/isaid.py +++ b/configs/_base_/datasets/isaid.py @@ -49,3 +49,6 @@ val_dataloader = dict( data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py index a31e0237a..349f008df 100644 --- a/configs/_base_/datasets/loveda.py +++ b/configs/_base_/datasets/loveda.py @@ -41,3 +41,6 @@ val_dataloader = dict( data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py index 24c2a9b3b..d19d2f346 100644 --- a/configs/_base_/datasets/pascal_context.py +++ b/configs/_base_/datasets/pascal_context.py @@ -47,3 +47,6 @@ val_dataloader = dict( ann_file='ImageSets/SegmentationContext/val.txt', pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py index 0db910a5e..42802e2ff 100644 --- a/configs/_base_/datasets/pascal_context_59.py +++ b/configs/_base_/datasets/pascal_context_59.py @@ -47,3 +47,6 @@ val_dataloader = dict( ann_file='ImageSets/SegmentationContext/val.txt', pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py index d4a6e8e95..63cf266de 100644 --- a/configs/_base_/datasets/pascal_voc12.py +++ b/configs/_base_/datasets/pascal_voc12.py @@ -44,3 +44,6 @@ val_dataloader = dict( ann_file='ImageSets/Segmentation/val.txt', pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py index 197e24957..70da17110 100644 --- a/configs/_base_/datasets/potsdam.py +++ b/configs/_base_/datasets/potsdam.py @@ -41,3 +41,6 @@ val_dataloader = dict( data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py index 53e3acc35..f18db4889 100644 --- a/configs/_base_/datasets/stare.py +++ b/configs/_base_/datasets/stare.py @@ -46,3 +46,6 @@ val_dataloader = dict( seg_map_path='annotations/validation'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py index 3e028d889..0a3f7e4ec 100644 --- a/configs/_base_/datasets/vaihingen.py +++ b/configs/_base_/datasets/vaihingen.py @@ -41,3 +41,6 @@ val_dataloader = dict( data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'), pipeline=test_pipeline)) test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/mmseg/core/evaluation/__init__.py b/mmseg/core/evaluation/__init__.py index 3d16d17e5..8b4bf03d6 100644 --- a/mmseg/core/evaluation/__init__.py +++ b/mmseg/core/evaluation/__init__.py @@ -1,11 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .class_names import get_classes, get_palette -from .eval_hooks import DistEvalHook, EvalHook -from .metrics import (eval_metrics, intersect_and_union, mean_dice, - mean_fscore, mean_iou, pre_eval_to_metrics) -__all__ = [ - 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', - 'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics', - 'intersect_and_union' -] +__all__ = ['get_classes', 'get_palette'] diff --git a/mmseg/core/evaluation/eval_hooks.py b/mmseg/core/evaluation/eval_hooks.py deleted file mode 100644 index 952db3b0b..000000000 --- a/mmseg/core/evaluation/eval_hooks.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp -import warnings - -import torch.distributed as dist -from mmcv.runner import DistEvalHook as _DistEvalHook -from mmcv.runner import EvalHook as _EvalHook -from torch.nn.modules.batchnorm import _BatchNorm - - -class EvalHook(_EvalHook): - """Single GPU EvalHook, with efficient test support. - - Args: - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - Default: False. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - pre_eval (bool): Whether to use progressive mode to evaluate model. - Default: False. - Returns: - list: The prediction results. - """ - - greater_keys = ['mIoU', 'mAcc', 'aAcc'] - - def __init__(self, - *args, - by_epoch=False, - efficient_test=False, - pre_eval=False, - **kwargs): - super().__init__(*args, by_epoch=by_epoch, **kwargs) - self.pre_eval = pre_eval - if efficient_test: - warnings.warn( - 'DeprecationWarning: ``efficient_test`` for evaluation hook ' - 'is deprecated, the evaluation hook is CPU memory friendly ' - 'with ``pre_eval=True`` as argument for ``single_gpu_test()`` ' - 'function') - - def _do_evaluate(self, runner): - """perform evaluation and save ckpt.""" - if not self._should_evaluate(runner): - return - - from mmseg.apis import single_gpu_test - results = single_gpu_test( - runner.model, self.dataloader, show=False, pre_eval=self.pre_eval) - runner.log_buffer.clear() - runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) - key_score = self.evaluate(runner, results) - if self.save_best: - self._save_ckpt(runner, key_score) - - -class DistEvalHook(_DistEvalHook): - """Distributed EvalHook, with efficient test support. - - Args: - by_epoch (bool): Determine perform evaluation by epoch or by iteration. - If set to True, it will perform by epoch. Otherwise, by iteration. - Default: False. - efficient_test (bool): Whether save the results as local numpy files to - save CPU memory during evaluation. Default: False. - pre_eval (bool): Whether to use progressive mode to evaluate model. - Default: False. - Returns: - list: The prediction results. - """ - - greater_keys = ['mIoU', 'mAcc', 'aAcc'] - - def __init__(self, - *args, - by_epoch=False, - efficient_test=False, - pre_eval=False, - **kwargs): - super().__init__(*args, by_epoch=by_epoch, **kwargs) - self.pre_eval = pre_eval - if efficient_test: - warnings.warn( - 'DeprecationWarning: ``efficient_test`` for evaluation hook ' - 'is deprecated, the evaluation hook is CPU memory friendly ' - 'with ``pre_eval=True`` as argument for ``multi_gpu_test()`` ' - 'function') - - def _do_evaluate(self, runner): - """perform evaluation and save ckpt.""" - # Synchronization of BatchNorm's buffer (running_mean - # and running_var) is not supported in the DDP of pytorch, - # which may cause the inconsistent performance of models in - # different ranks, so we broadcast BatchNorm's buffers - # of rank 0 to other ranks to avoid this. - if self.broadcast_bn_buffer: - model = runner.model - for name, module in model.named_modules(): - if isinstance(module, - _BatchNorm) and module.track_running_stats: - dist.broadcast(module.running_var, 0) - dist.broadcast(module.running_mean, 0) - - if not self._should_evaluate(runner): - return - - tmpdir = self.tmpdir - if tmpdir is None: - tmpdir = osp.join(runner.work_dir, '.eval_hook') - - from mmseg.apis import multi_gpu_test - results = multi_gpu_test( - runner.model, - self.dataloader, - tmpdir=tmpdir, - gpu_collect=self.gpu_collect, - pre_eval=self.pre_eval) - - runner.log_buffer.clear() - - if runner.rank == 0: - print('\n') - runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) - key_score = self.evaluate(runner, results) - - if self.save_best: - self._save_ckpt(runner, key_score) diff --git a/mmseg/core/evaluation/metrics.py b/mmseg/core/evaluation/metrics.py deleted file mode 100644 index a1c0908e1..000000000 --- a/mmseg/core/evaluation/metrics.py +++ /dev/null @@ -1,395 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from collections import OrderedDict - -import mmcv -import numpy as np -import torch - - -def f_score(precision, recall, beta=1): - """calculate the f-score value. - - Args: - precision (float | torch.Tensor): The precision value. - recall (float | torch.Tensor): The recall value. - beta (int): Determines the weight of recall in the combined score. - Default: False. - - Returns: - [torch.tensor]: The f-score value. - """ - score = (1 + beta**2) * (precision * recall) / ( - (beta**2 * precision) + recall) - return score - - -def intersect_and_union(pred_label, - label, - num_classes, - ignore_index, - label_map=dict(), - reduce_zero_label=False): - """Calculate intersection and Union. - - Args: - pred_label (ndarray | str): Prediction segmentation map - or predict result filename. - label (ndarray | str): Ground truth segmentation map - or label filename. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. The parameter will - work only when label is str. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. The parameter will - work only when label is str. Default: False. - - Returns: - torch.Tensor: The intersection of prediction and ground truth - histogram on all classes. - torch.Tensor: The union of prediction and ground truth histogram on - all classes. - torch.Tensor: The prediction histogram on all classes. - torch.Tensor: The ground truth histogram on all classes. - """ - - if isinstance(pred_label, str): - pred_label = torch.from_numpy(np.load(pred_label)) - else: - pred_label = torch.from_numpy((pred_label)) - - if isinstance(label, str): - label = torch.from_numpy( - mmcv.imread(label, flag='unchanged', backend='pillow')) - else: - label = torch.from_numpy(label) - - if label_map is not None: - for old_id, new_id in label_map.items(): - label[label == old_id] = new_id - if reduce_zero_label: - label[label == 0] = 255 - label = label - 1 - label[label == 254] = 255 - - mask = (label != ignore_index) - pred_label = pred_label[mask] - label = label[mask] - - intersect = pred_label[pred_label == label] - area_intersect = torch.histc( - intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_pred_label = torch.histc( - pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_label = torch.histc( - label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_union = area_pred_label + area_label - area_intersect - return area_intersect, area_union, area_pred_label, area_label - - -def total_intersect_and_union(results, - gt_seg_maps, - num_classes, - ignore_index, - label_map=dict(), - reduce_zero_label=False): - """Calculate Total Intersection and Union. - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str] | Iterables): list of ground - truth segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. Default: False. - - Returns: - ndarray: The intersection of prediction and ground truth histogram - on all classes. - ndarray: The union of prediction and ground truth histogram on all - classes. - ndarray: The prediction histogram on all classes. - ndarray: The ground truth histogram on all classes. - """ - total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) - total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) - for result, gt_seg_map in zip(results, gt_seg_maps): - area_intersect, area_union, area_pred_label, area_label = \ - intersect_and_union( - result, gt_seg_map, num_classes, ignore_index, - label_map, reduce_zero_label) - total_area_intersect += area_intersect - total_area_union += area_union - total_area_pred_label += area_pred_label - total_area_label += area_label - return total_area_intersect, total_area_union, total_area_pred_label, \ - total_area_label - - -def mean_iou(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False): - """Calculate Mean Intersection and Union (mIoU) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. Default: False. - - Returns: - dict[str, float | ndarray]: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category IoU, shape (num_classes, ). - """ - iou_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mIoU'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label) - return iou_result - - -def mean_dice(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False): - """Calculate Mean Dice (mDice) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. Default: False. - - Returns: - dict[str, float | ndarray]: Default metrics. - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category dice, shape (num_classes, ). - """ - - dice_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mDice'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label) - return dice_result - - -def mean_fscore(results, - gt_seg_maps, - num_classes, - ignore_index, - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False, - beta=1): - """Calculate Mean Intersection and Union (mIoU) - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. Default: False. - beta (int): Determines the weight of recall in the combined score. - Default: False. - - - Returns: - dict[str, float | ndarray]: Default metrics. - float: Overall accuracy on all images. - ndarray: Per category recall, shape (num_classes, ). - ndarray: Per category precision, shape (num_classes, ). - ndarray: Per category f-score, shape (num_classes, ). - """ - fscore_result = eval_metrics( - results=results, - gt_seg_maps=gt_seg_maps, - num_classes=num_classes, - ignore_index=ignore_index, - metrics=['mFscore'], - nan_to_num=nan_to_num, - label_map=label_map, - reduce_zero_label=reduce_zero_label, - beta=beta) - return fscore_result - - -def eval_metrics(results, - gt_seg_maps, - num_classes, - ignore_index, - metrics=['mIoU'], - nan_to_num=None, - label_map=dict(), - reduce_zero_label=False, - beta=1): - """Calculate evaluation metrics - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str] | Iterables): list of ground - truth segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Whether ignore zero label. Default: False. - Returns: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category evaluation metrics, shape (num_classes, ). - """ - - total_area_intersect, total_area_union, total_area_pred_label, \ - total_area_label = total_intersect_and_union( - results, gt_seg_maps, num_classes, ignore_index, label_map, - reduce_zero_label) - ret_metrics = total_area_to_metrics(total_area_intersect, total_area_union, - total_area_pred_label, - total_area_label, metrics, nan_to_num, - beta) - - return ret_metrics - - -def pre_eval_to_metrics(pre_eval_results, - metrics=['mIoU'], - nan_to_num=None, - beta=1): - """Convert pre-eval results to metrics. - - Args: - pre_eval_results (list[tuple[torch.Tensor]]): per image eval results - for computing evaluation metric - metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - Returns: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category evaluation metrics, shape (num_classes, ). - """ - - # convert list of tuples to tuple of lists, e.g. - # [(A_1, B_1, C_1, D_1), ..., (A_n, B_n, C_n, D_n)] to - # ([A_1, ..., A_n], ..., [D_1, ..., D_n]) - pre_eval_results = tuple(zip(*pre_eval_results)) - assert len(pre_eval_results) == 4 - - total_area_intersect = sum(pre_eval_results[0]) - total_area_union = sum(pre_eval_results[1]) - total_area_pred_label = sum(pre_eval_results[2]) - total_area_label = sum(pre_eval_results[3]) - - ret_metrics = total_area_to_metrics(total_area_intersect, total_area_union, - total_area_pred_label, - total_area_label, metrics, nan_to_num, - beta) - - return ret_metrics - - -def total_area_to_metrics(total_area_intersect, - total_area_union, - total_area_pred_label, - total_area_label, - metrics=['mIoU'], - nan_to_num=None, - beta=1): - """Calculate evaluation metrics - Args: - total_area_intersect (ndarray): The intersection of prediction and - ground truth histogram on all classes. - total_area_union (ndarray): The union of prediction and ground truth - histogram on all classes. - total_area_pred_label (ndarray): The prediction histogram on all - classes. - total_area_label (ndarray): The ground truth histogram on all classes. - metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - Returns: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category evaluation metrics, shape (num_classes, ). - """ - if isinstance(metrics, str): - metrics = [metrics] - allowed_metrics = ['mIoU', 'mDice', 'mFscore'] - if not set(metrics).issubset(set(allowed_metrics)): - raise KeyError('metrics {} is not supported'.format(metrics)) - - all_acc = total_area_intersect.sum() / total_area_label.sum() - ret_metrics = OrderedDict({'aAcc': all_acc}) - for metric in metrics: - if metric == 'mIoU': - iou = total_area_intersect / total_area_union - acc = total_area_intersect / total_area_label - ret_metrics['IoU'] = iou - ret_metrics['Acc'] = acc - elif metric == 'mDice': - dice = 2 * total_area_intersect / ( - total_area_pred_label + total_area_label) - acc = total_area_intersect / total_area_label - ret_metrics['Dice'] = dice - ret_metrics['Acc'] = acc - elif metric == 'mFscore': - precision = total_area_intersect / total_area_pred_label - recall = total_area_intersect / total_area_label - f_value = torch.tensor( - [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) - ret_metrics['Fscore'] = f_value - ret_metrics['Precision'] = precision - ret_metrics['Recall'] = recall - - ret_metrics = { - metric: value.numpy() - for metric, value in ret_metrics.items() - } - if nan_to_num is not None: - ret_metrics = OrderedDict({ - metric: np.nan_to_num(metric_value, nan=nan_to_num) - for metric, metric_value in ret_metrics.items() - }) - return ret_metrics