[Enhancement] Support dynamic threshold range in eval_hmean (#962)

* [Enhancement] Support dynamic threshold range in eval_hmean * upgrade textdetdataset, add deprecate warning
2022-04-22 17:07:36 +08:00 · 2022-04-22 17:07:36 +08:00 · 888f700134
parent 5555916032
commit 888f700134
5 changed files with 72 additions and 13 deletions
--- a/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py
+++ b/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py
@ -30,4 +30,9 @@ data = dict(
        datasets=test_list,
        pipeline=test_pipeline_1333_736))

-evaluation = dict(interval=100, metric='hmean-iou')
+evaluation = dict(
+    interval=100,
+    metric='hmean-iou',
+    min_score_thr=0.1,
+    max_score_thr=0.5,
+    step=0.1)
--- a/mmocr/core/evaluation/hmean.py
+++ b/mmocr/core/evaluation/hmean.py
@ -1,7 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import warnings
 from operator import itemgetter

 import mmcv
+import numpy as np
 from mmcv.utils import print_log

 import mmocr.utils as utils
@ -78,11 +80,15 @@ def eval_hmean(results,
               img_infos,
               ann_infos,
               metrics={'hmean-iou'},
-               score_thr=0.3,
+               score_thr=None,
+               min_score_thr=0.3,
+               max_score_thr=0.9,
+               step=0.1,
               rank_list=None,
               logger=None,
               **kwargs):
-    """Evaluation in hmean metric.
+    """Evaluation in hmean metric. It conducts grid search over a range of
+    boundary score thresholds and reports the best result.

    Args:
        results (list[dict]): Each dict corresponds to one image,
@ -91,7 +97,10 @@ def eval_hmean(results,
            containing the following keys: filename, height, width
        ann_infos (list[dict]): Each dict corresponds to one image,
            containing the following keys: masks, masks_ignore
-        score_thr (float): Score threshold of prediction map.
+        score_thr (float): Deprecated. Please use min_score_thr instead.
+        min_score_thr (float): Minimum score threshold of prediction map.
+        max_score_thr (float): Maximum score threshold of prediction map.
+        step (float): The spacing between score thresholds.
        metrics (set{str}): Hmean metric set, should be one or all of
            {'hmean-iou', 'hmean-ic13'}
    Returns:
@ -100,9 +109,21 @@ def eval_hmean(results,
    assert utils.is_type_list(results, dict)
    assert utils.is_type_list(img_infos, dict)
    assert utils.is_type_list(ann_infos, dict)
+
+    if score_thr:
+        warnings.warn('score_thr is deprecated. Please use min_score_thr '
+                      'instead.')
+        min_score_thr = score_thr
+
+    assert 0 <= min_score_thr <= max_score_thr <= 1
+    assert 0 <= step <= 1
    assert len(results) == len(img_infos) == len(ann_infos)
    assert isinstance(metrics, set)

+    min_score_thr = float(min_score_thr)
+    max_score_thr = float(max_score_thr)
+    step = float(step)
+
    gts, gts_ignore = get_gt_masks(ann_infos)

    preds = []
@ -112,21 +133,20 @@ def eval_hmean(results,
        if len(texts) > 0:
            assert utils.valid_boundary(texts[0], False)
        valid_texts, valid_text_scores = filter_2dlist_result(
-            texts, scores, score_thr)
+            texts, scores, min_score_thr)
        preds.append(valid_texts)
        pred_scores.append(valid_text_scores)

    eval_results = {}
+
    for metric in metrics:
        msg = f'Evaluating {metric}...'
        if logger is None:
            msg = '\n' + msg
        print_log(msg, logger=logger)
        best_result = dict(hmean=-1)
-        for iter in range(3, 10):
-            thr = iter * 0.1
-            if thr < score_thr:
-                continue
+        for thr in np.arange(min_score_thr, min(max_score_thr + step, 1.0),
+                             step):
            top_preds = select_top_boundary(preds, pred_scores, thr)
            if metric == 'hmean-iou':
                result, img_result = hmean_iou.eval_hmean_iou(
--- a/mmocr/datasets/icdar_dataset.py
+++ b/mmocr/datasets/icdar_dataset.py
@ -138,7 +138,10 @@ class IcdarDataset(CocoDataset):
                 results,
                 metric='hmean-iou',
                 logger=None,
-                 score_thr=0.3,
+                 score_thr=None,
+                 min_score_thr=0.3,
+                 max_score_thr=0.9,
+                 step=0.1,
                 rank_list=None,
                 **kwargs):
        """Evaluate the hmean metric.
@ -148,6 +151,10 @@ class IcdarDataset(CocoDataset):
            metric (str | list[str]): Metrics to be evaluated.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
+            score_thr (float): Deprecated. Please use min_score_thr instead.
+            min_score_thr (float): Minimum score threshold of prediction map.
+            max_score_thr (float): Maximum score threshold of prediction map.
+            step (float): The spacing between score thresholds.
            rank_list (str): json file used to save eval result
                of each image after ranking.
        Returns:
@ -172,6 +179,9 @@ class IcdarDataset(CocoDataset):
            ann_infos,
            metrics=metrics,
            score_thr=score_thr,
+            min_score_thr=min_score_thr,
+            max_score_thr=max_score_thr,
+            step=step,
            logger=logger,
            rank_list=rank_list)

--- a/mmocr/datasets/text_det_dataset.py
+++ b/mmocr/datasets/text_det_dataset.py
@ -80,7 +80,10 @@ class TextDetDataset(BaseDataset):
    def evaluate(self,
                 results,
                 metric='hmean-iou',
-                 score_thr=0.3,
+                 score_thr=None,
+                 min_score_thr=0.3,
+                 max_score_thr=0.9,
+                 step=0.1,
                 rank_list=None,
                 logger=None,
                 **kwargs):
@ -89,7 +92,10 @@ class TextDetDataset(BaseDataset):
        Args:
            results (list): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated.
-            score_thr (float): Score threshold for prediction map.
+            score_thr (float): Deprecated. Please use min_score_thr instead.
+            min_score_thr (float): Minimum score threshold of prediction map.
+            max_score_thr (float): Maximum score threshold of prediction map.
+            step (float): The spacing between score thresholds.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            rank_list (str): json file used to save eval result
@ -116,6 +122,9 @@ class TextDetDataset(BaseDataset):
            ann_infos,
            metrics=metrics,
            score_thr=score_thr,
+            min_score_thr=min_score_thr,
+            max_score_thr=max_score_thr,
+            step=step,
            logger=logger,
            rank_list=rank_list)

--- a/tests/test_dataset/test_icdar_dataset.py
+++ b/tests/test_dataset/test_icdar_dataset.py
@ -151,6 +151,21 @@ def test_icdar_dataset():
        'boundary_result': []
    }]
    output = dataset.evaluate(results, metrics)
-
    assert output['hmean-iou:hmean'] == 1
    assert output['hmean-ic13:hmean'] == 1
+
+    results = [{
+        'boundary_result': [[50, 60, 70, 60, 70, 80, 50, 80, 0.5],
+                            [100, 120, 130, 120, 120, 150, 100, 150, 1]]
+    }, {
+        'boundary_result': []
+    }]
+    output = dataset.evaluate(
+        results, metrics, min_score_thr=0, max_score_thr=1, step=0.5)
+    assert output['hmean-iou:hmean'] == 1
+    assert output['hmean-ic13:hmean'] == 1
+
+    output = dataset.evaluate(
+        results, metrics, min_score_thr=0.6, max_score_thr=1, step=0.5)
+    assert output['hmean-iou:hmean'] == 1 / 1.5
+    assert output['hmean-ic13:hmean'] == 1 / 1.5