Add dump_ocr_data

2025-06-03 21:54:47 +08:00 · 2022-05-12 16:27:58 +00:00 · 2022-05-12 16:27:58 +00:00 · f0c6d44ce8
commit f0c6d44ce8
parent 98bc90bd1c
3 changed files with 264 additions and 2 deletions
--- a/mmocr/utils/init.py
+++ b/mmocr/utils/init.py
@ -6,7 +6,7 @@ from .box_util import (bezier_to_polygon, is_on_same_line, sort_points,
 from .check_argument import (equal_len, is_2dlist, is_3dlist, is_none_or_type,
                             is_type_list, valid_boundary)
 from .collect_env import collect_env
-from .data_convert_util import convert_annotations
+from .data_convert_util import convert_annotations, dump_ocr_data
 from .fileio import list_from_file, list_to_file
 from .img_util import drop_orientation, is_not_png
 from .lmdb_util import recog2lmdb
@ -22,5 +22,5 @@ __all__ = [
    'convert_annotations', 'is_not_png', 'list_to_file', 'list_from_file',
    'is_on_same_line', 'stitch_boxes_into_lines', 'StringStrip',
    'revert_sync_batchnorm', 'bezier_to_polygon', 'sort_points',
-    'setup_multi_processes', 'recog2lmdb'
+    'setup_multi_processes', 'recog2lmdb', 'dump_ocr_data'
 ]
--- a/mmocr/utils/data_convert_util.py
+++ b/mmocr/utils/data_convert_util.py
@ -1,7 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from typing import Dict, Sequence
+
 import mmcv


+# TODO: Remove it when all converters no longer need it
 def convert_annotations(image_infos, out_json_name):
    """Convert the annotation into coco style.

@ -40,3 +44,123 @@ def convert_annotations(image_infos, out_json_name):
    mmcv.dump(out_json, out_json_name)

    return out_json
+
+
+def dump_ocr_data(image_infos: Sequence[Dict], out_json_name: str,
+                  task_name: str) -> Dict:
+    """Dump the annotation in openmmlab style.
+
+    Args:
+        image_infos (list): List of image information dicts. Read the example
+            section for the format illustration.
+        out_json_name (str): Output json filename.
+        task_name (str): Task name. Options are 'textdet', 'textrecog' and
+            'textspotter'.
+
+    Examples:
+        Here is the general structure of image_infos for textdet/textspotter
+        tasks:
+
+        .. code-block:: python
+
+            [  # A list of dicts. Each dict stands for a single image.
+                {
+                    "file_name": "1.jpg",
+                    "height": 100,
+                    "width": 200,
+                    "segm_file": "seg.txt" # (optional) path to segmap
+                    "anno_info": [  # a list of dicts. Each dict
+                                    # stands for a single text instance.
+                        {
+                            "iscrowd": 0,  # 0: don't ignore this instance
+                                           # 1: ignore
+                            "category_id": 0,  # Instance class id. Must be 0
+                                               # for OCR tasks to permanently
+                                               # be mapped to 'text' category
+                            "bbox": [x, y, w, h],
+                            "segmentation": [x1, y1, x2, y2, ...],
+                            "text": "demo_text"  # for textspotter only.
+                        }
+                    ]
+                },
+            ]
+
+        The input for textrecog task is much simpler:
+
+        .. code-block:: python
+
+            [   # A list of dicts. Each dict stands for a single image.
+                {
+                    "file_name": "1.jpg",
+                    "anno_info": [  # a list of dicts. Each dict
+                                    # stands for a single text instance.
+                                    # However, in textrecog, usually each
+                                    # image only has one text instance.
+                        {
+                            "text": "demo_text"
+                        }
+                    ]
+                },
+            ]
+
+
+    Returns:
+        out_json(dict): The openmmlab-style annotation.
+    """
+
+    task2dataset = {
+        'textspotter': 'TextSpotterDataset',
+        'textdet': 'TextDetDataset',
+        'textrecog': 'TextRecogDataset'
+    }
+
+    assert isinstance(image_infos, list)
+    assert isinstance(out_json_name, str)
+    assert task_name in task2dataset.keys()
+
+    dataset_type = task2dataset[task_name]
+
+    out_json = dict(
+        metainfo=dict(dataset_type=dataset_type, task_name=task_name),
+        data_list=list())
+    if task_name in ['textdet', 'textspotter']:
+        out_json['metainfo']['category'] = [dict(id=0, name='text')]
+
+    for image_info in image_infos:
+
+        single_info = dict(instances=list())
+        single_info['img_path'] = image_info['file_name']
+        if task_name in ['textdet', 'textspotter']:
+            single_info['height'] = image_info['height']
+            single_info['width'] = image_info['width']
+            if 'segm_file' in image_info:
+                single_info['seg_map'] = image_info['segm_file']
+
+        anno_infos = image_info['anno_info']
+
+        for anno_info in anno_infos:
+            instance = {}
+            if task_name in ['textrecog', 'textspotter']:
+                instance['text'] = anno_info['text']
+            if task_name in ['textdet', 'textspotter']:
+                mask = anno_info['segmentation']
+                # TODO: remove this if-branch when all converters have been
+                # verified
+                if len(mask) == 1 and len(mask[0]) > 1:
+                    mask = mask[0]
+                    warnings.warn(
+                        'Detected nested segmentation for a single'
+                        'text instance, which should be a 1-d array now.'
+                        'Please fix input accordingly.')
+                instance['mask'] = mask
+                x, y, w, h = anno_info['bbox']
+                instance['bbox'] = [x, y, x + w, y + h]
+                instance['bbox_label'] = anno_info['category_id']
+                instance['ignore'] = anno_info['iscrowd'] == 1
+            single_info['instances'].append(instance)
+
+        out_json['data_list'].append(single_info)
+
+    mmcv.dump(out_json, out_json_name)
+
+    return out_json
--- a/tests/test_utils/test_data_convert_util.py
+++ b/tests/test_utils/test_data_convert_util.py
@ -0,0 +1,138 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+from unittest import TestCase
+
+import mmcv
+
+from mmocr.utils.data_convert_util import dump_ocr_data
+
+
+class TestDataConvertUtil(TestCase):
+
+    def _create_dummy_data(self):
+        img_info = dict(
+            file_name='test.jpg', height=100, width=200, segm_file='seg.txt')
+        anno_info = [
+            dict(
+                iscrowd=0,
+                category_id=0,
+                bbox=[0, 0, 10, 20],  # x, y, w, h
+                text='t1',
+                segmentation=[0, 0, 0, 10, 10, 20, 20, 0]),
+            dict(
+                iscrowd=1,
+                category_id=0,
+                bbox=[10, 10, 20, 20],  # x, y, w, h
+                text='t2',
+                segmentation=[10, 10, 10, 30, 30, 30, 30, 10]),
+        ]
+        img_info['anno_info'] = anno_info
+        img_infos = [img_info]
+
+        det_target = {
+            'metainfo': {
+                'dataset_type': 'TextDetDataset',
+                'task_name': 'textdet',
+                'category': [{
+                    'id': 0,
+                    'name': 'text'
+                }],
+            },
+            'data_list': [{
+                'img_path':
+                'test.jpg',
+                'height':
+                100,
+                'width':
+                200,
+                'seg_map':
+                'seg.txt',
+                'instances': [
+                    {
+                        'bbox': [0, 0, 10, 20],
+                        'bbox_label': 0,
+                        'mask': [0, 0, 0, 10, 10, 20, 20, 0],
+                        'ignore': False
+                    },
+                    {
+                        'bbox': [10, 10, 30, 30],  # x1, y1, x2, y2
+                        'bbox_label': 0,
+                        'mask': [10, 10, 10, 30, 30, 30, 30, 10],
+                        'ignore': True
+                    }
+                ]
+            }]
+        }
+
+        spotter_target = {
+            'metainfo': {
+                'dataset_type': 'TextSpotterDataset',
+                'task_name': 'textspotter',
+                'category': [{
+                    'id': 0,
+                    'name': 'text'
+                }],
+            },
+            'data_list': [{
+                'img_path':
+                'test.jpg',
+                'height':
+                100,
+                'width':
+                200,
+                'seg_map':
+                'seg.txt',
+                'instances': [
+                    {
+                        'bbox': [0, 0, 10, 20],
+                        'bbox_label': 0,
+                        'mask': [0, 0, 0, 10, 10, 20, 20, 0],
+                        'text': 't1',
+                        'ignore': False
+                    },
+                    {
+                        'bbox': [10, 10, 30, 30],  # x1, y1, x2, y2
+                        'bbox_label': 0,
+                        'mask': [10, 10, 10, 30, 30, 30, 30, 10],
+                        'text': 't2',
+                        'ignore': True
+                    }
+                ]
+            }]
+        }
+
+        recog_target = {
+            'metainfo': {
+                'dataset_type': 'TextRecogDataset',
+                'task_name': 'textrecog',
+            },
+            'data_list': [{
+                'img_path': 'test.jpg',
+                'instances': [{
+                    'text': 't1',
+                }, {
+                    'text': 't2',
+                }]
+            }]
+        }
+
+        return img_infos, det_target, spotter_target, recog_target
+
+    def test_dump_ocr_data(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_path = osp.join(tmpdir, 'ocr.json')
+            input_data, det_target, spotter_target, recog_target = \
+                self._create_dummy_data()
+
+            dump_ocr_data(input_data, output_path, 'textdet')
+            result = mmcv.load(output_path)
+            self.assertDictEqual(result, det_target)
+
+            dump_ocr_data(input_data, output_path, 'textspotter')
+            result = mmcv.load(output_path)
+            self.assertDictEqual(result, spotter_target)
+
+            dump_ocr_data(input_data, output_path, 'textrecog')
+            result = mmcv.load(output_path)
+            self.assertDictEqual(result, recog_target)