diff --git a/configs/textdet/_base_/datasets/synthtext.py b/configs/textdet/_base_/datasets/synthtext.py index 09c60000..9b2310c3 100644 --- a/configs/textdet/_base_/datasets/synthtext.py +++ b/configs/textdet/_base_/datasets/synthtext.py @@ -1,17 +1,8 @@ -synthtext_textdet_data_root = 'data/det/synthtext' +synthtext_textdet_data_root = 'data/synthtext' synthtext_textdet_train = dict( type='OCRDataset', data_root=synthtext_textdet_data_root, - ann_file='instances_training.json', - data_prefix=dict(img_path='imgs/'), + ann_file='textdet_train.json', filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=None) - -synthtext_textdet_test = dict( - type='OCRDataset', - data_root=synthtext_textdet_data_root, - ann_file='instances_test.json', - data_prefix=dict(img_path='imgs/'), - test_mode=True, - pipeline=None) diff --git a/configs/textrecog/_base_/datasets/synthtext.py b/configs/textrecog/_base_/datasets/synthtext.py index 18a44a23..94fc3049 100644 --- a/configs/textrecog/_base_/datasets/synthtext.py +++ b/configs/textrecog/_base_/datasets/synthtext.py @@ -1,25 +1,19 @@ -synthtext_textrecog_data_root = 'data/rec/SynthText/' +synthtext_textrecog_data_root = 'data/synthtext' synthtext_textrecog_train = dict( type='OCRDataset', data_root=synthtext_textrecog_data_root, - data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'), - ann_file='train_labels.json', - test_mode=False, - pipeline=None) - -synthtext_an_textrecog_train = dict( - type='OCRDataset', - data_root=synthtext_textrecog_data_root, - data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'), - ann_file='alphanumeric_train_labels.json', - test_mode=False, + ann_file='textrecog_train.json', pipeline=None) synthtext_sub_textrecog_train = dict( type='OCRDataset', data_root=synthtext_textrecog_data_root, - data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'), - ann_file='subset_train_labels.json', - test_mode=False, + ann_file='subset_textrecog_train.json', + pipeline=None) + +synthtext_an_textrecog_train = dict( + type='OCRDataset', + data_root=synthtext_textrecog_data_root, + ann_file='alphanumeric_textrecog_train.json', pipeline=None) diff --git a/dataset_zoo/synthtext/sample_anno.md b/dataset_zoo/synthtext/sample_anno.md new file mode 100644 index 00000000..90468d30 --- /dev/null +++ b/dataset_zoo/synthtext/sample_anno.md @@ -0,0 +1,124 @@ +**Text Detection/Recognition/Spotting** + +```json +{ + "imnames": [['8/ballet_106_0.jpg', ...]], + "wordBB": [[[420.58957 418.85016 448.08478 410.3094 117.745026 + 322.30963 322.6857 159.09138 154.27284 260.14597 + 431.9315 427.52274 296.86508 99.56819 108.96211 ] + [512.3321 431.88342 519.4515 499.81183 179.0544 + 377.97382 376.4993 203.64464 193.77492 313.61514 + 487.58023 484.64633 365.83176 142.49403 144.90457 ] + [511.92203 428.7077 518.7375 499.0373 172.1684 + 378.35858 377.2078 203.3191 193.0739 319.69186 + 485.6758 482.571 365.76303 142.31898 144.43858 ] + [420.1795 415.67444 447.3708 409.53485 110.859024 + 322.6944 323.3942 158.76585 153.57182 266.2227 + 430.02707 425.44742 296.79636 99.39314 108.49613 ]] + + [[ 21.06382 46.19922 47.570374 73.95366 197.17792 + 9.993624 48.437763 9.064571 49.659035 208.57095 + 118.41646 162.82489 29.548729 5.800581 28.812992 ] + [ 23.069519 48.254295 50.130234 77.18146 208.71487 + 8.999153 46.69632 9.698633 50.869553 203.25742 + 122.64043 168.38647 29.660484 6.2558594 29.602367 ] + [ 41.827087 68.39458 70.03627 98.65903 245.30832 + 30.534437 68.589294 32.57161 73.74529 264.40634 + 147.7303 189.70224 72.08 22.759935 50.81941 ] + [ 39.82139 66.3395 67.47641 95.43123 233.77136 + 31.528908 70.33074 31.937548 72.534775 269.71988 + 143.50633 184.14066 71.96825 22.304657 50.030033 ]], ...], + "charBB": [[[423.16126397 439.60847343 450.66887979 466.31976402 479.76190495 + 504.59927448 418.80489444 450.13965942 464.16775197 480.46891089 + 502.46437709 413.02373632 433.01396211 446.7222192 470.28467827 + 482.51674486 116.52285438 139.51408587 150.7448586 162.03366629 + 322.84717946 333.54881536 343.28386485 363.07416389 323.48968759 + 337.98503283 356.66355903 160.48517048 174.1707753 189.64454066 + 155.7637383 167.45490471 179.63644201 262.2183876 271.75848874 + 284.05396524 298.26103738 432.8464733 449.15387392 468.07231897 + 428.11482147 445.61538159 469.24565878 296.86441324 323.6603118 + 344.09880401 101.14677814 110.45423597 120.54555495 131.18342618 + 132.20545124 110.01673682 120.83144568 131.35885673] + [438.2997574 452.61288403 466.31976402 482.22585715 498.3934528 + 512.20555863 431.88338084 466.11639619 481.73414937 499.62012025 + 519.36789779 432.51717267 449.23571387 465.73425964 484.45139112 + 499.59056304 140.27413679 149.59811175 160.13352083 169.59504507 + 333.55849014 344.33923741 361.08275796 378.09844418 339.92898685 + 355.57692063 376.51230484 174.1707753 189.07871028 203.64462646 + 165.22739457 181.27572412 193.60260894 270.99557614 283.13281739 + 298.75499435 313.61511672 447.1421735 470.27065563 487.02126631 + 446.97485257 468.98979567 484.64633864 317.88691577 341.16094163 + 365.8300006 111.15280603 120.54555495 130.72086821 135.27663717 + 142.4726875 120.1331955 133.07976304 144.75919258] + [435.54895424 449.95797159 464.5848793 480.68235876 497.04793842 + 511.1101386 428.95660757 463.61882066 480.14247127 498.2535215 + 518.03243928 429.36600266 447.19056345 463.89483785 482.21016814 + 498.18529977 142.63162835 152.55587851 162.80539142 172.21885945 + 333.35620309 344.09880401 360.86201193 377.82379299 339.7646859 + 355.37508239 376.1110999 172.46032372 187.37816388 201.39094518 + 163.04321987 178.99078221 191.89681939 275.3073355 286.08373072 + 301.85539131 318.57227103 444.54207279 467.53925436 485.27070558 + 444.57367155 466.90671029 482.56302723 317.62908407 340.9131681 + 365.44465854 109.40501176 119.4999228 129.67892444 134.35253232 + 140.97421069 118.61779828 131.34019115 143.25688164] + [420.17946701 436.74150236 448.74896556 464.5848793 478.18853922 + 503.4152019 415.67442461 447.3707845 462.35927516 478.8614766 + 500.86810735 409.54560397 430.77026495 444.64606264 467.79077782 + 480.89051912 119.14629674 142.63162835 153.56593297 164.78799774 + 322.69436747 333.35620309 343.11884239 362.84714115 323.37931952 + 337.83763574 356.35573621 158.76583616 172.46032372 187.37816388 + 153.57183805 165.15781218 177.92125239 266.22269514 274.45156305 + 286.82608962 302.69695881 430.02705241 446.01814255 466.05208347 + 425.44741792 443.19481667 466.90671029 296.79634428 323.49707084 + 343.82488703 99.39315359 109.40501176 119.4999228 130.25798537 + 130.70149005 108.49612777 119.08444238 129.84935461]] + + [[ 22.26958901 21.60559248 27.0241972 27.25747678 27.45783459 + 28.73896576 47.91255579 47.80732383 53.77711568 54.24219042 + 52.00169325 74.79043429 80.45929285 81.04748707 76.11658669 + 82.58335942 203.67278213 201.2743445 205.59358622 205.51198143 + 10.06536976 10.82312635 16.77203865 16.31842372 54.80444433 + 54.66492 47.33822371 15.08534083 15.18716407 9.62607092 + 51.06813224 50.18928243 56.16019366 220.78902143 236.08062638 + 231.69267533 209.73652786 124.25352842 119.99631725 128.73732717 + 165.78411123 167.31764153 167.05531699 29.97351822 31.5116502 + 31.14650552 5.88513488 12.51324147 12.57920537 8.21515307 + 8.21998849 35.66412031 29.17945741 36.00660903] + [ 22.46075572 21.76391911 27.25747678 27.49456029 27.73554156 + 28.85582217 48.25428361 48.21714995 54.27828788 54.78857757 + 52.4595556 75.57743634 81.15533616 81.86325615 76.681392 + 83.31596322 210.04771309 203.83983042 208.00417391 207.41791524 + 9.79265706 10.55231862 16.36406888 15.97405105 54.64620856 + 54.49559004 47.09756263 15.18716407 15.29808166 9.69862498 + 51.27597632 50.48652154 56.49239954 216.92183074 232.02141018 + 226.44624213 203.25738931 125.19349641 121.32658508 130.00428964 + 167.43676857 169.36588297 168.38645076 29.58279603 31.19899202 + 30.75826599 5.92344996 12.57920537 12.64571832 8.23451892 + 8.26856497 35.82646468 29.342662 36.22165159] + [ 40.15739982 40.47241401 40.79219178 41.14411963 41.50190876 + 41.80934074 66.81590976 68.05921213 68.6519006 69.30152766 + 70.01097963 96.14641662 96.04484417 96.89110144 97.81897661 + 98.62829468 237.26055111 240.35280825 243.54641271 245.04022528 + 31.33842788 31.14650552 30.84702178 30.54399042 69.80098672 + 68.7212013 68.62479627 32.13243303 32.34474067 32.54416771 + 72.82501686 73.31372392 73.70922459 267.74318222 265.39839711 + 259.52741156 253.14023308 144.60810334 145.23371653 147.69958337 + 186.00278322 188.17713786 189.70144388 71.89351759 53.62266986 + 54.40060855 22.41084398 22.51791234 22.62587258 17.11356079 + 22.74567232 50.25232032 46.05692507 50.79345235] + [ 39.82138755 40.18347166 40.44598236 40.79219178 41.08959901 + 41.64111176 66.33948982 67.47640971 68.01403337 68.60595247 + 69.3953105 95.13188979 95.21297344 95.91593691 97.08847413 + 97.75212171 229.94285119 237.26055111 240.66752705 242.74145162 + 31.52890731 31.33842788 31.16401306 30.81155638 69.87135926 + 68.80273568 68.71664209 31.93753588 32.13243303 32.34474067 + 72.53476992 72.88981775 73.28094858 269.71986636 267.92938572 + 262.93698624 256.88902439 143.50635029 143.61251781 146.24080653 + 184.14064261 185.86853729 188.17713786 71.96823746 53.79651809 + 54.60870874 22.30465649 22.41084398 22.51791234 17.07939535 + 22.63671808 50.03002471 45.81009198 50.49899163]], ...], + "txt": [['Lines:\nI lost\nKevin ' 'will ' 'line\nand ' + 'and\nthe ' '(and ' 'the\nout ' + 'you ' "don't\n pkg "], ...] +} +``` diff --git a/dataset_zoo/synthtext/textdet.py b/dataset_zoo/synthtext/textdet.py new file mode 100644 index 00000000..4f35f5bc --- /dev/null +++ b/dataset_zoo/synthtext/textdet.py @@ -0,0 +1,30 @@ +data_root = 'data/synthtext' +cache_path = 'data/cache' + +train_preparer = dict( + obtainer=dict( + type='NaiveDataObtainer', + cache_path=cache_path, + files=[ + dict( + url='magnet:?xt=urn:btih:2dba9518166cbd141534cbf381aa3e99a08' + '7e83c&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&t' + 'r=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2F' + 'tracker.opentrackr.org%3A1337%2Fannounce', + save_name='SynthText.zip', + md5='8ae0309c80ff882f9d6ba5ea62cdb556', + split=['train'], + content=['image', 'annotation'], + mapping=[['SynthText/SynthText/*', 'textdet_imgs/train/'], + ['textdet_imgs/train/gt.mat', 'annotations/gt.mat']]), + ]), + gatherer=dict(type='MonoGatherer', ann_name='gt.mat'), + parser=dict(type='SynthTextAnnParser'), + packer=dict(type='TextDetPacker'), + dumper=dict(type='JsonDumper'), +) + +delete = ['SynthText', 'annotations'] + +config_generator = dict( + type='TextDetConfigGenerator', data_root=data_root, test_anns=None) diff --git a/dataset_zoo/synthtext/textrecog.py b/dataset_zoo/synthtext/textrecog.py new file mode 100644 index 00000000..2ab2bdd1 --- /dev/null +++ b/dataset_zoo/synthtext/textrecog.py @@ -0,0 +1,30 @@ +_base_ = ['textdet.py'] + +_base_.train_preparer.obtainer.files.append( + dict( + url='https://download.openmmlab.com/mmocr/data/1.x/recog/' + 'SynthText/subset_textrecog_train.json', + save_name='subset_textrecog_train.json', + md5='151c4edd1cc240362046d3a6f8f4b4c6', + split=['train'], + content=['annotation'])) +_base_.train_preparer.obtainer.files.append( + dict( + url='https://download.openmmlab.com/mmocr/data/1.x/recog/' + 'SynthText/alphanumeric_textrecog_train.json', + save_name='alphanumeric_textrecog_train.json', + md5='89b80163435794ca117a124d081d68a9', + split=['train'], + content=['annotation'])) +_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train' +_base_.train_preparer.packer.type = 'TextRecogCropPacker' + +config_generator = dict( + type='TextRecogConfigGenerator', + train_anns=[ + dict(ann_file='textrecog_train.json', dataset_postfix=''), + dict(ann_file='subset_textrecog_train.json', dataset_postfix='sub'), + dict( + ann_file='alphanumeric_textrecog_train.json', + dataset_postfix='an'), + ]) diff --git a/docs/en/user_guides/data_prepare/recog.md b/docs/en/user_guides/data_prepare/recog.md index cc086a91..47a3dd04 100644 --- a/docs/en/user_guides/data_prepare/recog.md +++ b/docs/en/user_guides/data_prepare/recog.md @@ -6,28 +6,27 @@ This page is a manual preparation guide for datasets not yet supported by [Datas ## Overview -| Dataset | images | annotation file | annotation file | -| :-------------------: | :---------------------------------------------------: | :-------------------------------------------------------------: | :-------------------------------------------------------------: | -| | | training | test | -| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - | -| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - | -| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - | -| SynthText (Synth800k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) \|[subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) | - | -| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - | -| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | -| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - | -| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - | -| LSVT | [homepage](https://rrc.cvc.uab.es/?ch=16) | - | - | -| IMGUR | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) | - | - | -| KAIST | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) | - | - | -| MTWI | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) | - | - | -| ReCTS | [homepage](https://rrc.cvc.uab.es/?ch=12) | - | - | -| IIIT-ILST | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) | - | - | -| VinText | [homepage](https://github.com/VinAIResearch/dict-guided) | - | - | -| BID | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) | - | - | -| RCTW | [homepage](https://rctw.vlrlab.net/index.html) | - | - | -| HierText | [homepage](https://github.com/google-research-datasets/hiertext) | - | - | -| ArT | [homepage](https://rrc.cvc.uab.es/?ch=14) | - | - | +| Dataset | images | annotation file | annotation file | +| :--------------: | :-----------------------------------------------------: | :--------------------------------------------------------------: | :---------------------------------------------------------------: | +| | | training | test | +| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - | +| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - | +| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - | +| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - | +| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | +| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - | +| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - | +| LSVT | [homepage](https://rrc.cvc.uab.es/?ch=16) | - | - | +| IMGUR | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) | - | - | +| KAIST | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) | - | - | +| MTWI | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) | - | - | +| ReCTS | [homepage](https://rrc.cvc.uab.es/?ch=12) | - | - | +| IIIT-ILST | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) | - | - | +| VinText | [homepage](https://github.com/VinAIResearch/dict-guided) | - | - | +| BID | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) | - | - | +| RCTW | [homepage](https://rctw.vlrlab.net/index.html) | - | - | +| HierText | [homepage](https://github.com/google-research-datasets/hiertext) | - | - | +| ArT | [homepage](https://rrc.cvc.uab.es/?ch=14) | - | - | (\*) Since the official homepage is unavailable now, we provide an alternative for quick reference. However, we do not guarantee the correctness of the dataset. @@ -149,52 +148,6 @@ Please make sure you're using the right annotation to train the model by checkin │ └── mnt ``` -## SynthText (Synth800k) - -- Step1: Download `SynthText.zip` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) - -- Step2: According to your actual needs, download the most appropriate one from the following options: [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) (7,266,686 annotations), [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) (2,400,000 randomly sampled annotations) and [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) (7,239,272 annotations with alphanumeric characters only). - -```{warning} -Please make sure you're using the right annotation to train the model by checking its dataset specs in Model Zoo. -``` - -- Step3: - - ```bash - mkdir SynthText && cd SynthText - mv /path/to/SynthText.zip . - unzip SynthText.zip - mv SynthText synthtext - - mv /path/to/subset_train_labels.json . - mv /path/to/train_labels.json . - mv /path/to/alphanumeric_train_labels.json . - - # create soft link - cd /path/to/mmocr/data/recog - ln -s /path/to/SynthText SynthText - ``` - -- Step4: Generate cropped images and labels: - - ```bash - cd /path/to/mmocr - - python tools/dataset_converters/textrecog/synthtext_converter.py data/recog/SynthText/gt.mat data/recog/SynthText/ data/recog/SynthText/synthtext/SynthText_patch_horizontal --n_proc 8 - ``` - -- After running the above codes, the directory structure - should be as follows: - - ```text - ├── SynthText - │ ├── alphanumeric_train_labels.json - │ ├── subset_train_labels.json - │ ├── train_labels.json - │ └── synthtext - ``` - ## SynthAdd - Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x)) diff --git a/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py b/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py index 664ca681..e4ed1f9e 100644 --- a/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py +++ b/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py @@ -88,8 +88,18 @@ class NaiveDataObtainer: ' Please manually download the required files' ' following the guides.') - print(f'Start to download {osp.basename(dst_path)}...') - print('If you stuck here for a long time, please check your network.') + if url.startswith('magnet'): + raise NotImplementedError('Please use any BitTorrent client to ' + 'download the following magnet link to ' + f'{osp.abspath(dst_path)} and ' + f'try again.\nLink: {url}') + + print('Downloading...') + print(f'URL: {url}') + print(f'Destination: {osp.abspath(dst_path)}') + print('If you stuck here for a long time, please check your network, ' + 'or manually download the file to the destination path and ' + 'run the script again.') request.urlretrieve(url, dst_path, progress) print('') diff --git a/mmocr/datasets/preparers/parsers/__init__.py b/mmocr/datasets/preparers/parsers/__init__.py index 8c79ca99..58d6d9bd 100644 --- a/mmocr/datasets/preparers/parsers/__init__.py +++ b/mmocr/datasets/preparers/parsers/__init__.py @@ -8,6 +8,7 @@ from .icdar_txt_parser import (ICDARTxtTextDetAnnParser, from .naf_parser import NAFAnnParser from .sroie_parser import SROIETextDetAnnParser from .svt_parser import SVTTextDetAnnParser +from .synthtext_parser import SynthTextAnnParser from .totaltext_parser import TotaltextTextDetAnnParser from .wildreceipt_parser import WildreceiptKIEAnnParser @@ -15,5 +16,6 @@ __all__ = [ 'BaseParser', 'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser', 'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser', 'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser', - 'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser' + 'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser', + 'SynthTextAnnParser' ] diff --git a/mmocr/datasets/preparers/parsers/synthtext_parser.py b/mmocr/datasets/preparers/parsers/synthtext_parser.py new file mode 100644 index 00000000..0764e0d8 --- /dev/null +++ b/mmocr/datasets/preparers/parsers/synthtext_parser.py @@ -0,0 +1,172 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import List, Optional, Tuple, Union + +import numpy as np +from mmengine import track_parallel_progress +from scipy.io import loadmat + +from mmocr.utils import is_type_list +from ..data_preparer import DATA_PARSERS +from .base import BaseParser + + +@DATA_PARSERS.register_module() +class SynthTextAnnParser(BaseParser): + """SynthText Text Detection Annotation Parser. + + Args: + split (str): The split of the dataset. It is usually set automatically + and users do not need to set it manually in config file in most + cases. + nproc (int): Number of processes to process the data. Defaults to 1. + It is usually set automatically and users do not need to set it + manually in config file in most cases. + separator (str): The separator between each element in a line. Defaults + to ','. + ignore (str): The text to be ignored. Defaults to '###'. + format (str): The format of the annotation. Defaults to + 'x1,y1,x2,y2,x3,y3,x4,trans'. + encoding (str): The encoding of the annotation file. Defaults to + 'utf-8-sig'. + remove_strs (List[str], Optional): Used to remove redundant strings in + the transcription. Defaults to None. + mode (str, optional): The mode of the box converter. Supported modes + are 'xywh' and 'xyxy'. Defaults to None. + """ + + def __init__(self, + split: str, + nproc: int, + separator: str = ',', + ignore: str = '###', + format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans', + encoding: str = 'utf-8', + remove_strs: Optional[List[str]] = None, + mode: str = None) -> None: + self.sep = separator + self.format = format + self.encoding = encoding + self.ignore = ignore + self.mode = mode + self.remove_strs = remove_strs + super().__init__(split=split, nproc=nproc) + + def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray: + """Trace the boundary point of text. + + Args: + char_boxes (list[ndarray]): The char boxes for one text. Each + element is 4x2 ndarray. + + Returns: + ndarray: The boundary point sets with size nx2. + """ + assert is_type_list(char_boxes, np.ndarray) + + # from top left to to right + p_top = [box[0:2] for box in char_boxes] + # from bottom right to bottom left + p_bottom = [ + char_boxes[idx][[2, 3], :] + for idx in range(len(char_boxes) - 1, -1, -1) + ] + + p = p_top + p_bottom + + boundary = np.concatenate(p).astype(int) + + return boundary + + def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray, + strs: np.ndarray + ) -> Tuple[List[np.ndarray], List[str]]: + """Match the bboxes, char bboxes, and strs. + + Args: + bboxes (ndarray): The text boxes of size (2, 4, num_box). + char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box). + strs (ndarray): The string of size (num_strs,) + + Returns: + Tuple(List[ndarray], List[str]): Polygon & word list. + """ + assert isinstance(bboxes, np.ndarray) + assert isinstance(char_bboxes, np.ndarray) + assert isinstance(strs, np.ndarray) + # bboxes = bboxes.astype(np.int32) + char_bboxes = char_bboxes.astype(np.int32) + + if len(char_bboxes.shape) == 2: + char_bboxes = np.expand_dims(char_bboxes, axis=2) + char_bboxes = np.transpose(char_bboxes, (2, 1, 0)) + num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1] + + poly_charbox_list = [[] for _ in range(num_boxes)] + + words = [] + for line in strs: + words += line.split() + words_len = [len(w) for w in words] + words_end_inx = np.cumsum(words_len) + start_inx = 0 + for word_inx, end_inx in enumerate(words_end_inx): + for char_inx in range(start_inx, end_inx): + poly_charbox_list[word_inx].append(char_bboxes[char_inx]) + start_inx = end_inx + + for box_inx in range(num_boxes): + assert len(poly_charbox_list[box_inx]) > 0 + + poly_boundary_list = [] + for item in poly_charbox_list: + boundary = np.ndarray((0, 2)) + if len(item) > 0: + boundary = self._trace_boundary(item) + poly_boundary_list.append(boundary) + + return poly_boundary_list, words + + def parse_files(self, img_paths: Union[List[str], str], + ann_paths: Union[List[str], str]) -> List[Tuple]: + """Convert annotations to MMOCR format. + + Args: + img_paths (str or list[str]): the list of image paths or the + directory of the images. + ann_paths (str or list[str]): the list of annotation paths or the + path of the annotation file which contains all the annotations. + + Returns: + List[Tuple]: A list of a tuple of (image_path, instances). + + - img_path (str): The path of image file, which can be read + directly by opencv. + - instance: instance is a list of dict containing parsed + annotations, which should contain the following keys: + + - 'poly' or 'box' (textdet or textspotting) + - 'text' (textspotting or textrecog) + - 'ignore' (all task) + """ + assert isinstance(ann_paths, str) + gt = loadmat(ann_paths) + self.img_dir = img_paths + samples = track_parallel_progress( + self.parse_file, + list( + zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0], + gt['txt'][0])), + nproc=self.nproc) + return samples + + def parse_file(self, annotation: Tuple) -> Tuple: + """Parse single annotation.""" + img_file, wordBB, charBB, txt = annotation + polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt) + + instances = list() + for poly, word in zip(polys_list, word_list): + instances.append( + dict(poly=poly.flatten().tolist(), text=word, ignore=False)) + return osp.join(self.img_dir, img_file[0]), instances