Add dump_ocr_data

This commit is contained in:
gaotongxiao 2022-05-12 16:27:58 +00:00
parent 98bc90bd1c
commit f0c6d44ce8
3 changed files with 264 additions and 2 deletions

View File

@ -6,7 +6,7 @@ from .box_util import (bezier_to_polygon, is_on_same_line, sort_points,
from .check_argument import (equal_len, is_2dlist, is_3dlist, is_none_or_type,
is_type_list, valid_boundary)
from .collect_env import collect_env
from .data_convert_util import convert_annotations
from .data_convert_util import convert_annotations, dump_ocr_data
from .fileio import list_from_file, list_to_file
from .img_util import drop_orientation, is_not_png
from .lmdb_util import recog2lmdb
@ -22,5 +22,5 @@ __all__ = [
'convert_annotations', 'is_not_png', 'list_to_file', 'list_from_file',
'is_on_same_line', 'stitch_boxes_into_lines', 'StringStrip',
'revert_sync_batchnorm', 'bezier_to_polygon', 'sort_points',
'setup_multi_processes', 'recog2lmdb'
'setup_multi_processes', 'recog2lmdb', 'dump_ocr_data'
]

View File

@ -1,7 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict, Sequence
import mmcv
# TODO: Remove it when all converters no longer need it
def convert_annotations(image_infos, out_json_name):
"""Convert the annotation into coco style.
@ -40,3 +44,123 @@ def convert_annotations(image_infos, out_json_name):
mmcv.dump(out_json, out_json_name)
return out_json
def dump_ocr_data(image_infos: Sequence[Dict], out_json_name: str,
task_name: str) -> Dict:
"""Dump the annotation in openmmlab style.
Args:
image_infos (list): List of image information dicts. Read the example
section for the format illustration.
out_json_name (str): Output json filename.
task_name (str): Task name. Options are 'textdet', 'textrecog' and
'textspotter'.
Examples:
Here is the general structure of image_infos for textdet/textspotter
tasks:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"height": 100,
"width": 200,
"segm_file": "seg.txt" # (optional) path to segmap
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
{
"iscrowd": 0, # 0: don't ignore this instance
# 1: ignore
"category_id": 0, # Instance class id. Must be 0
# for OCR tasks to permanently
# be mapped to 'text' category
"bbox": [x, y, w, h],
"segmentation": [x1, y1, x2, y2, ...],
"text": "demo_text" # for textspotter only.
}
]
},
]
The input for textrecog task is much simpler:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
# However, in textrecog, usually each
# image only has one text instance.
{
"text": "demo_text"
}
]
},
]
Returns:
out_json(dict): The openmmlab-style annotation.
"""
task2dataset = {
'textspotter': 'TextSpotterDataset',
'textdet': 'TextDetDataset',
'textrecog': 'TextRecogDataset'
}
assert isinstance(image_infos, list)
assert isinstance(out_json_name, str)
assert task_name in task2dataset.keys()
dataset_type = task2dataset[task_name]
out_json = dict(
metainfo=dict(dataset_type=dataset_type, task_name=task_name),
data_list=list())
if task_name in ['textdet', 'textspotter']:
out_json['metainfo']['category'] = [dict(id=0, name='text')]
for image_info in image_infos:
single_info = dict(instances=list())
single_info['img_path'] = image_info['file_name']
if task_name in ['textdet', 'textspotter']:
single_info['height'] = image_info['height']
single_info['width'] = image_info['width']
if 'segm_file' in image_info:
single_info['seg_map'] = image_info['segm_file']
anno_infos = image_info['anno_info']
for anno_info in anno_infos:
instance = {}
if task_name in ['textrecog', 'textspotter']:
instance['text'] = anno_info['text']
if task_name in ['textdet', 'textspotter']:
mask = anno_info['segmentation']
# TODO: remove this if-branch when all converters have been
# verified
if len(mask) == 1 and len(mask[0]) > 1:
mask = mask[0]
warnings.warn(
'Detected nested segmentation for a single'
'text instance, which should be a 1-d array now.'
'Please fix input accordingly.')
instance['mask'] = mask
x, y, w, h = anno_info['bbox']
instance['bbox'] = [x, y, x + w, y + h]
instance['bbox_label'] = anno_info['category_id']
instance['ignore'] = anno_info['iscrowd'] == 1
single_info['instances'].append(instance)
out_json['data_list'].append(single_info)
mmcv.dump(out_json, out_json_name)
return out_json

View File

@ -0,0 +1,138 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
from unittest import TestCase
import mmcv
from mmocr.utils.data_convert_util import dump_ocr_data
class TestDataConvertUtil(TestCase):
def _create_dummy_data(self):
img_info = dict(
file_name='test.jpg', height=100, width=200, segm_file='seg.txt')
anno_info = [
dict(
iscrowd=0,
category_id=0,
bbox=[0, 0, 10, 20], # x, y, w, h
text='t1',
segmentation=[0, 0, 0, 10, 10, 20, 20, 0]),
dict(
iscrowd=1,
category_id=0,
bbox=[10, 10, 20, 20], # x, y, w, h
text='t2',
segmentation=[10, 10, 10, 30, 30, 30, 30, 10]),
]
img_info['anno_info'] = anno_info
img_infos = [img_info]
det_target = {
'metainfo': {
'dataset_type': 'TextDetDataset',
'task_name': 'textdet',
'category': [{
'id': 0,
'name': 'text'
}],
},
'data_list': [{
'img_path':
'test.jpg',
'height':
100,
'width':
200,
'seg_map':
'seg.txt',
'instances': [
{
'bbox': [0, 0, 10, 20],
'bbox_label': 0,
'mask': [0, 0, 0, 10, 10, 20, 20, 0],
'ignore': False
},
{
'bbox': [10, 10, 30, 30], # x1, y1, x2, y2
'bbox_label': 0,
'mask': [10, 10, 10, 30, 30, 30, 30, 10],
'ignore': True
}
]
}]
}
spotter_target = {
'metainfo': {
'dataset_type': 'TextSpotterDataset',
'task_name': 'textspotter',
'category': [{
'id': 0,
'name': 'text'
}],
},
'data_list': [{
'img_path':
'test.jpg',
'height':
100,
'width':
200,
'seg_map':
'seg.txt',
'instances': [
{
'bbox': [0, 0, 10, 20],
'bbox_label': 0,
'mask': [0, 0, 0, 10, 10, 20, 20, 0],
'text': 't1',
'ignore': False
},
{
'bbox': [10, 10, 30, 30], # x1, y1, x2, y2
'bbox_label': 0,
'mask': [10, 10, 10, 30, 30, 30, 30, 10],
'text': 't2',
'ignore': True
}
]
}]
}
recog_target = {
'metainfo': {
'dataset_type': 'TextRecogDataset',
'task_name': 'textrecog',
},
'data_list': [{
'img_path': 'test.jpg',
'instances': [{
'text': 't1',
}, {
'text': 't2',
}]
}]
}
return img_infos, det_target, spotter_target, recog_target
def test_dump_ocr_data(self):
with tempfile.TemporaryDirectory() as tmpdir:
output_path = osp.join(tmpdir, 'ocr.json')
input_data, det_target, spotter_target, recog_target = \
self._create_dummy_data()
dump_ocr_data(input_data, output_path, 'textdet')
result = mmcv.load(output_path)
self.assertDictEqual(result, det_target)
dump_ocr_data(input_data, output_path, 'textspotter')
result = mmcv.load(output_path)
self.assertDictEqual(result, spotter_target)
dump_ocr_data(input_data, output_path, 'textrecog')
result = mmcv.load(output_path)
self.assertDictEqual(result, recog_target)