diff --git a/docs/en/datasets/det.md b/docs/en/datasets/det.md index 51de1ab6..75637e8d 100644 --- a/docs/en/datasets/det.md +++ b/docs/en/datasets/det.md @@ -32,6 +32,10 @@ The structure of the text detection dataset directory is organized as follows. │   ├── imgs │   ├── instances_test.json │   └── instances_training.json +├── CurvedSynText150k +│   ├── syntext_word_eng +│   ├── emcs_imgs +│   └── instances_training.json ``` |Dataset|Images| | Annotation Files | | | @@ -43,6 +47,7 @@ The structure of the text detection dataset directory is organized as follows. | Synthtext | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | instances_training.lmdb ([data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb), [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb)) | - | - | | TextOCR | [homepage](https://textvqa.org/textocr/dataset) | - | - | - | Totaltext | [homepage](https://github.com/cs-chan/Total-Text-Dataset) | - | - | - +| CurvedSynText150k | [homepage](https://github.com/aim-uofa/AdelaiDet/blob/master/datasets/README.md) \| [Part1](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) \| [Part2](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) | [instances_training.json](https://download.openmmlab.com/mmocr/data/curvedsyntext/instances_training.json) | - | - | ## Important Note @@ -149,3 +154,27 @@ mv Polygon/Test ../annotations/test ```bash python tools/data/textdet/totaltext_converter.py /path/to/totaltext -o /path/to/totaltext --split-list training test ``` + +### CurvedSynText150k + +- Step1: Download [syntext1.zip](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) and [syntext2.zip](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) to `CurvedSynText150k/`. +- Step2: + +```bash +unzip -q syntext1.zip +mv train.json train1.json +unzip images.zip +rm images.zip + +unzip -q syntext2.zip +mv train.json train2.json +unzip images.zip +rm images.zip +``` + +- Step3: Download [instances_training.json](https://download.openmmlab.com/mmocr/data/curvedsyntext/instances_training.json) to `CurvedSynText150k/` +- Or, generate `instances_training.json` with following command: + +```bash +python tools/data/common/curvedsyntext_converter.py PATH/TO/CurvedSynText150k --nproc 4 +``` diff --git a/tools/data/common/curvedsyntext_converter.py b/tools/data/common/curvedsyntext_converter.py new file mode 100644 index 00000000..ddffd50e --- /dev/null +++ b/tools/data/common/curvedsyntext_converter.py @@ -0,0 +1,129 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np + +from mmocr.utils import bezier_to_polygon, sort_points + +# The default dictionary used by CurvedSynthText +dict95 = [ + ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', + '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', + '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', + 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', + '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', + 'z', '{', '|', '}', '~' +] +UNK = len(dict95) +EOS = UNK + 1 + + +def digit2text(rec): + res = [] + for d in rec: + assert d <= EOS + if d == EOS: + break + if d == UNK: + print('Warning: Has a UNK character') + res.append('口') # Or any special character not in the target dict + res.append(dict95[d]) + return ''.join(res) + + +def modify_annotation(ann, num_sample, start_img_id=0, start_ann_id=0): + ann['text'] = digit2text(ann.pop('rec')) + # Get hide egmentation points + polygon_pts = bezier_to_polygon(ann['bezier_pts'], num_sample=num_sample) + ann['segmentation'] = np.asarray(sort_points(polygon_pts)).reshape( + 1, -1).tolist() + ann['image_id'] += start_img_id + ann['id'] += start_ann_id + return ann + + +def modify_image_info(image_info, path_prefix, start_img_id=0): + image_info['file_name'] = osp.join(path_prefix, image_info['file_name']) + image_info['id'] += start_img_id + return image_info + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert CurvedSynText150k to COCO format') + parser.add_argument('root_path', help='CurvedSynText150k root path') + parser.add_argument('-o', '--out-dir', help='Output path') + parser.add_argument( + '-n', + '--num-sample', + type=int, + default=4, + help='Number of sample points at each Bezier curve.') + parser.add_argument( + '--nproc', default=1, type=int, help='Number of processes') + args = parser.parse_args() + return args + + +def convert_annotations(data, + path_prefix, + num_sample, + nproc, + start_img_id=0, + start_ann_id=0): + modify_image_info_with_params = partial( + modify_image_info, path_prefix=path_prefix, start_img_id=start_img_id) + modify_annotation_with_params = partial( + modify_annotation, + num_sample=num_sample, + start_img_id=start_img_id, + start_ann_id=start_ann_id) + if nproc > 1: + data['annotations'] = mmcv.track_parallel_progress( + modify_annotation_with_params, data['annotations'], nproc=nproc) + data['images'] = mmcv.track_parallel_progress( + modify_image_info_with_params, data['images'], nproc=nproc) + else: + data['annotations'] = mmcv.track_progress( + modify_annotation_with_params, data['annotations']) + data['images'] = mmcv.track_progress( + modify_image_info_with_params, + data['images'], + ) + data['categories'] = [{'id': 1, 'name': 'text'}] + return data + + +def main(): + args = parse_args() + root_path = args.root_path + out_dir = args.out_dir if args.out_dir else root_path + mmcv.mkdir_or_exist(out_dir) + + anns = mmcv.load(osp.join(root_path, 'train1.json')) + data1 = convert_annotations(anns, 'syntext_word_eng', args.num_sample, + args.nproc) + + # Get the maximum image id from data1 + start_img_id = max(data1['images'], key=lambda x: x['id'])['id'] + 1 + start_ann_id = max(data1['annotations'], key=lambda x: x['id'])['id'] + 1 + anns = mmcv.load(osp.join(root_path, 'train2.json')) + data2 = convert_annotations( + anns, + 'emcs_imgs', + args.num_sample, + args.nproc, + start_img_id=start_img_id, + start_ann_id=start_ann_id) + + data1['images'] += data2['images'] + data1['annotations'] += data2['annotations'] + mmcv.dump(data1, osp.join(out_dir, 'instances_training.json')) + + +if __name__ == '__main__': + main()