mirror of https://github.com/open-mmlab/mmocr.git
[Dataset Preparer] SynthText (#1779)
* [Dataset] Support Synthtext * update * update * finalize setting * fix * textrec * update * add fake magnet obtainer * update rec * update * sample_annpull/1786/head
parent
7ef34c4407
commit
c6580a48c1
|
@ -1,17 +1,8 @@
|
|||
synthtext_textdet_data_root = 'data/det/synthtext'
|
||||
synthtext_textdet_data_root = 'data/synthtext'
|
||||
|
||||
synthtext_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textdet_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
synthtext_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textdet_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,25 +1,19 @@
|
|||
synthtext_textrecog_data_root = 'data/rec/SynthText/'
|
||||
synthtext_textrecog_data_root = 'data/synthtext'
|
||||
|
||||
synthtext_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
pipeline=None)
|
||||
|
||||
synthtext_an_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='alphanumeric_train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
synthtext_sub_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='subset_train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='subset_textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
synthtext_an_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
ann_file='alphanumeric_textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
**Text Detection/Recognition/Spotting**
|
||||
|
||||
```json
|
||||
{
|
||||
"imnames": [['8/ballet_106_0.jpg', ...]],
|
||||
"wordBB": [[[420.58957 418.85016 448.08478 410.3094 117.745026
|
||||
322.30963 322.6857 159.09138 154.27284 260.14597
|
||||
431.9315 427.52274 296.86508 99.56819 108.96211 ]
|
||||
[512.3321 431.88342 519.4515 499.81183 179.0544
|
||||
377.97382 376.4993 203.64464 193.77492 313.61514
|
||||
487.58023 484.64633 365.83176 142.49403 144.90457 ]
|
||||
[511.92203 428.7077 518.7375 499.0373 172.1684
|
||||
378.35858 377.2078 203.3191 193.0739 319.69186
|
||||
485.6758 482.571 365.76303 142.31898 144.43858 ]
|
||||
[420.1795 415.67444 447.3708 409.53485 110.859024
|
||||
322.6944 323.3942 158.76585 153.57182 266.2227
|
||||
430.02707 425.44742 296.79636 99.39314 108.49613 ]]
|
||||
|
||||
[[ 21.06382 46.19922 47.570374 73.95366 197.17792
|
||||
9.993624 48.437763 9.064571 49.659035 208.57095
|
||||
118.41646 162.82489 29.548729 5.800581 28.812992 ]
|
||||
[ 23.069519 48.254295 50.130234 77.18146 208.71487
|
||||
8.999153 46.69632 9.698633 50.869553 203.25742
|
||||
122.64043 168.38647 29.660484 6.2558594 29.602367 ]
|
||||
[ 41.827087 68.39458 70.03627 98.65903 245.30832
|
||||
30.534437 68.589294 32.57161 73.74529 264.40634
|
||||
147.7303 189.70224 72.08 22.759935 50.81941 ]
|
||||
[ 39.82139 66.3395 67.47641 95.43123 233.77136
|
||||
31.528908 70.33074 31.937548 72.534775 269.71988
|
||||
143.50633 184.14066 71.96825 22.304657 50.030033 ]], ...],
|
||||
"charBB": [[[423.16126397 439.60847343 450.66887979 466.31976402 479.76190495
|
||||
504.59927448 418.80489444 450.13965942 464.16775197 480.46891089
|
||||
502.46437709 413.02373632 433.01396211 446.7222192 470.28467827
|
||||
482.51674486 116.52285438 139.51408587 150.7448586 162.03366629
|
||||
322.84717946 333.54881536 343.28386485 363.07416389 323.48968759
|
||||
337.98503283 356.66355903 160.48517048 174.1707753 189.64454066
|
||||
155.7637383 167.45490471 179.63644201 262.2183876 271.75848874
|
||||
284.05396524 298.26103738 432.8464733 449.15387392 468.07231897
|
||||
428.11482147 445.61538159 469.24565878 296.86441324 323.6603118
|
||||
344.09880401 101.14677814 110.45423597 120.54555495 131.18342618
|
||||
132.20545124 110.01673682 120.83144568 131.35885673]
|
||||
[438.2997574 452.61288403 466.31976402 482.22585715 498.3934528
|
||||
512.20555863 431.88338084 466.11639619 481.73414937 499.62012025
|
||||
519.36789779 432.51717267 449.23571387 465.73425964 484.45139112
|
||||
499.59056304 140.27413679 149.59811175 160.13352083 169.59504507
|
||||
333.55849014 344.33923741 361.08275796 378.09844418 339.92898685
|
||||
355.57692063 376.51230484 174.1707753 189.07871028 203.64462646
|
||||
165.22739457 181.27572412 193.60260894 270.99557614 283.13281739
|
||||
298.75499435 313.61511672 447.1421735 470.27065563 487.02126631
|
||||
446.97485257 468.98979567 484.64633864 317.88691577 341.16094163
|
||||
365.8300006 111.15280603 120.54555495 130.72086821 135.27663717
|
||||
142.4726875 120.1331955 133.07976304 144.75919258]
|
||||
[435.54895424 449.95797159 464.5848793 480.68235876 497.04793842
|
||||
511.1101386 428.95660757 463.61882066 480.14247127 498.2535215
|
||||
518.03243928 429.36600266 447.19056345 463.89483785 482.21016814
|
||||
498.18529977 142.63162835 152.55587851 162.80539142 172.21885945
|
||||
333.35620309 344.09880401 360.86201193 377.82379299 339.7646859
|
||||
355.37508239 376.1110999 172.46032372 187.37816388 201.39094518
|
||||
163.04321987 178.99078221 191.89681939 275.3073355 286.08373072
|
||||
301.85539131 318.57227103 444.54207279 467.53925436 485.27070558
|
||||
444.57367155 466.90671029 482.56302723 317.62908407 340.9131681
|
||||
365.44465854 109.40501176 119.4999228 129.67892444 134.35253232
|
||||
140.97421069 118.61779828 131.34019115 143.25688164]
|
||||
[420.17946701 436.74150236 448.74896556 464.5848793 478.18853922
|
||||
503.4152019 415.67442461 447.3707845 462.35927516 478.8614766
|
||||
500.86810735 409.54560397 430.77026495 444.64606264 467.79077782
|
||||
480.89051912 119.14629674 142.63162835 153.56593297 164.78799774
|
||||
322.69436747 333.35620309 343.11884239 362.84714115 323.37931952
|
||||
337.83763574 356.35573621 158.76583616 172.46032372 187.37816388
|
||||
153.57183805 165.15781218 177.92125239 266.22269514 274.45156305
|
||||
286.82608962 302.69695881 430.02705241 446.01814255 466.05208347
|
||||
425.44741792 443.19481667 466.90671029 296.79634428 323.49707084
|
||||
343.82488703 99.39315359 109.40501176 119.4999228 130.25798537
|
||||
130.70149005 108.49612777 119.08444238 129.84935461]]
|
||||
|
||||
[[ 22.26958901 21.60559248 27.0241972 27.25747678 27.45783459
|
||||
28.73896576 47.91255579 47.80732383 53.77711568 54.24219042
|
||||
52.00169325 74.79043429 80.45929285 81.04748707 76.11658669
|
||||
82.58335942 203.67278213 201.2743445 205.59358622 205.51198143
|
||||
10.06536976 10.82312635 16.77203865 16.31842372 54.80444433
|
||||
54.66492 47.33822371 15.08534083 15.18716407 9.62607092
|
||||
51.06813224 50.18928243 56.16019366 220.78902143 236.08062638
|
||||
231.69267533 209.73652786 124.25352842 119.99631725 128.73732717
|
||||
165.78411123 167.31764153 167.05531699 29.97351822 31.5116502
|
||||
31.14650552 5.88513488 12.51324147 12.57920537 8.21515307
|
||||
8.21998849 35.66412031 29.17945741 36.00660903]
|
||||
[ 22.46075572 21.76391911 27.25747678 27.49456029 27.73554156
|
||||
28.85582217 48.25428361 48.21714995 54.27828788 54.78857757
|
||||
52.4595556 75.57743634 81.15533616 81.86325615 76.681392
|
||||
83.31596322 210.04771309 203.83983042 208.00417391 207.41791524
|
||||
9.79265706 10.55231862 16.36406888 15.97405105 54.64620856
|
||||
54.49559004 47.09756263 15.18716407 15.29808166 9.69862498
|
||||
51.27597632 50.48652154 56.49239954 216.92183074 232.02141018
|
||||
226.44624213 203.25738931 125.19349641 121.32658508 130.00428964
|
||||
167.43676857 169.36588297 168.38645076 29.58279603 31.19899202
|
||||
30.75826599 5.92344996 12.57920537 12.64571832 8.23451892
|
||||
8.26856497 35.82646468 29.342662 36.22165159]
|
||||
[ 40.15739982 40.47241401 40.79219178 41.14411963 41.50190876
|
||||
41.80934074 66.81590976 68.05921213 68.6519006 69.30152766
|
||||
70.01097963 96.14641662 96.04484417 96.89110144 97.81897661
|
||||
98.62829468 237.26055111 240.35280825 243.54641271 245.04022528
|
||||
31.33842788 31.14650552 30.84702178 30.54399042 69.80098672
|
||||
68.7212013 68.62479627 32.13243303 32.34474067 32.54416771
|
||||
72.82501686 73.31372392 73.70922459 267.74318222 265.39839711
|
||||
259.52741156 253.14023308 144.60810334 145.23371653 147.69958337
|
||||
186.00278322 188.17713786 189.70144388 71.89351759 53.62266986
|
||||
54.40060855 22.41084398 22.51791234 22.62587258 17.11356079
|
||||
22.74567232 50.25232032 46.05692507 50.79345235]
|
||||
[ 39.82138755 40.18347166 40.44598236 40.79219178 41.08959901
|
||||
41.64111176 66.33948982 67.47640971 68.01403337 68.60595247
|
||||
69.3953105 95.13188979 95.21297344 95.91593691 97.08847413
|
||||
97.75212171 229.94285119 237.26055111 240.66752705 242.74145162
|
||||
31.52890731 31.33842788 31.16401306 30.81155638 69.87135926
|
||||
68.80273568 68.71664209 31.93753588 32.13243303 32.34474067
|
||||
72.53476992 72.88981775 73.28094858 269.71986636 267.92938572
|
||||
262.93698624 256.88902439 143.50635029 143.61251781 146.24080653
|
||||
184.14064261 185.86853729 188.17713786 71.96823746 53.79651809
|
||||
54.60870874 22.30465649 22.41084398 22.51791234 17.07939535
|
||||
22.63671808 50.03002471 45.81009198 50.49899163]], ...],
|
||||
"txt": [['Lines:\nI lost\nKevin ' 'will ' 'line\nand '
|
||||
'and\nthe ' '(and ' 'the\nout '
|
||||
'you ' "don't\n pkg "], ...]
|
||||
}
|
||||
```
|
|
@ -0,0 +1,30 @@
|
|||
data_root = 'data/synthtext'
|
||||
cache_path = 'data/cache'
|
||||
|
||||
train_preparer = dict(
|
||||
obtainer=dict(
|
||||
type='NaiveDataObtainer',
|
||||
cache_path=cache_path,
|
||||
files=[
|
||||
dict(
|
||||
url='magnet:?xt=urn:btih:2dba9518166cbd141534cbf381aa3e99a08'
|
||||
'7e83c&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&t'
|
||||
'r=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2F'
|
||||
'tracker.opentrackr.org%3A1337%2Fannounce',
|
||||
save_name='SynthText.zip',
|
||||
md5='8ae0309c80ff882f9d6ba5ea62cdb556',
|
||||
split=['train'],
|
||||
content=['image', 'annotation'],
|
||||
mapping=[['SynthText/SynthText/*', 'textdet_imgs/train/'],
|
||||
['textdet_imgs/train/gt.mat', 'annotations/gt.mat']]),
|
||||
]),
|
||||
gatherer=dict(type='MonoGatherer', ann_name='gt.mat'),
|
||||
parser=dict(type='SynthTextAnnParser'),
|
||||
packer=dict(type='TextDetPacker'),
|
||||
dumper=dict(type='JsonDumper'),
|
||||
)
|
||||
|
||||
delete = ['SynthText', 'annotations']
|
||||
|
||||
config_generator = dict(
|
||||
type='TextDetConfigGenerator', data_root=data_root, test_anns=None)
|
|
@ -0,0 +1,30 @@
|
|||
_base_ = ['textdet.py']
|
||||
|
||||
_base_.train_preparer.obtainer.files.append(
|
||||
dict(
|
||||
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
|
||||
'SynthText/subset_textrecog_train.json',
|
||||
save_name='subset_textrecog_train.json',
|
||||
md5='151c4edd1cc240362046d3a6f8f4b4c6',
|
||||
split=['train'],
|
||||
content=['annotation']))
|
||||
_base_.train_preparer.obtainer.files.append(
|
||||
dict(
|
||||
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
|
||||
'SynthText/alphanumeric_textrecog_train.json',
|
||||
save_name='alphanumeric_textrecog_train.json',
|
||||
md5='89b80163435794ca117a124d081d68a9',
|
||||
split=['train'],
|
||||
content=['annotation']))
|
||||
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
|
||||
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
|
||||
|
||||
config_generator = dict(
|
||||
type='TextRecogConfigGenerator',
|
||||
train_anns=[
|
||||
dict(ann_file='textrecog_train.json', dataset_postfix=''),
|
||||
dict(ann_file='subset_textrecog_train.json', dataset_postfix='sub'),
|
||||
dict(
|
||||
ann_file='alphanumeric_textrecog_train.json',
|
||||
dataset_postfix='an'),
|
||||
])
|
|
@ -6,28 +6,27 @@ This page is a manual preparation guide for datasets not yet supported by [Datas
|
|||
|
||||
## Overview
|
||||
|
||||
| Dataset | images | annotation file | annotation file |
|
||||
| :-------------------: | :---------------------------------------------------: | :-------------------------------------------------------------: | :-------------------------------------------------------------: |
|
||||
| | | training | test |
|
||||
| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - |
|
||||
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - |
|
||||
| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - |
|
||||
| SynthText (Synth800k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) \|[subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) | - |
|
||||
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - |
|
||||
| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
|
||||
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - |
|
||||
| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - |
|
||||
| LSVT | [homepage](https://rrc.cvc.uab.es/?ch=16) | - | - |
|
||||
| IMGUR | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) | - | - |
|
||||
| KAIST | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) | - | - |
|
||||
| MTWI | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) | - | - |
|
||||
| ReCTS | [homepage](https://rrc.cvc.uab.es/?ch=12) | - | - |
|
||||
| IIIT-ILST | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) | - | - |
|
||||
| VinText | [homepage](https://github.com/VinAIResearch/dict-guided) | - | - |
|
||||
| BID | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) | - | - |
|
||||
| RCTW | [homepage](https://rctw.vlrlab.net/index.html) | - | - |
|
||||
| HierText | [homepage](https://github.com/google-research-datasets/hiertext) | - | - |
|
||||
| ArT | [homepage](https://rrc.cvc.uab.es/?ch=14) | - | - |
|
||||
| Dataset | images | annotation file | annotation file |
|
||||
| :--------------: | :-----------------------------------------------------: | :--------------------------------------------------------------: | :---------------------------------------------------------------: |
|
||||
| | | training | test |
|
||||
| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - |
|
||||
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - |
|
||||
| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - |
|
||||
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - |
|
||||
| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
|
||||
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - |
|
||||
| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - |
|
||||
| LSVT | [homepage](https://rrc.cvc.uab.es/?ch=16) | - | - |
|
||||
| IMGUR | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) | - | - |
|
||||
| KAIST | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) | - | - |
|
||||
| MTWI | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) | - | - |
|
||||
| ReCTS | [homepage](https://rrc.cvc.uab.es/?ch=12) | - | - |
|
||||
| IIIT-ILST | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) | - | - |
|
||||
| VinText | [homepage](https://github.com/VinAIResearch/dict-guided) | - | - |
|
||||
| BID | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) | - | - |
|
||||
| RCTW | [homepage](https://rctw.vlrlab.net/index.html) | - | - |
|
||||
| HierText | [homepage](https://github.com/google-research-datasets/hiertext) | - | - |
|
||||
| ArT | [homepage](https://rrc.cvc.uab.es/?ch=14) | - | - |
|
||||
|
||||
(\*) Since the official homepage is unavailable now, we provide an alternative for quick reference. However, we do not guarantee the correctness of the dataset.
|
||||
|
||||
|
@ -149,52 +148,6 @@ Please make sure you're using the right annotation to train the model by checkin
|
|||
│ └── mnt
|
||||
```
|
||||
|
||||
## SynthText (Synth800k)
|
||||
|
||||
- Step1: Download `SynthText.zip` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)
|
||||
|
||||
- Step2: According to your actual needs, download the most appropriate one from the following options: [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) (7,266,686 annotations), [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) (2,400,000 randomly sampled annotations) and [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) (7,239,272 annotations with alphanumeric characters only).
|
||||
|
||||
```{warning}
|
||||
Please make sure you're using the right annotation to train the model by checking its dataset specs in Model Zoo.
|
||||
```
|
||||
|
||||
- Step3:
|
||||
|
||||
```bash
|
||||
mkdir SynthText && cd SynthText
|
||||
mv /path/to/SynthText.zip .
|
||||
unzip SynthText.zip
|
||||
mv SynthText synthtext
|
||||
|
||||
mv /path/to/subset_train_labels.json .
|
||||
mv /path/to/train_labels.json .
|
||||
mv /path/to/alphanumeric_train_labels.json .
|
||||
|
||||
# create soft link
|
||||
cd /path/to/mmocr/data/recog
|
||||
ln -s /path/to/SynthText SynthText
|
||||
```
|
||||
|
||||
- Step4: Generate cropped images and labels:
|
||||
|
||||
```bash
|
||||
cd /path/to/mmocr
|
||||
|
||||
python tools/dataset_converters/textrecog/synthtext_converter.py data/recog/SynthText/gt.mat data/recog/SynthText/ data/recog/SynthText/synthtext/SynthText_patch_horizontal --n_proc 8
|
||||
```
|
||||
|
||||
- After running the above codes, the directory structure
|
||||
should be as follows:
|
||||
|
||||
```text
|
||||
├── SynthText
|
||||
│ ├── alphanumeric_train_labels.json
|
||||
│ ├── subset_train_labels.json
|
||||
│ ├── train_labels.json
|
||||
│ └── synthtext
|
||||
```
|
||||
|
||||
## SynthAdd
|
||||
|
||||
- Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x))
|
||||
|
|
|
@ -88,8 +88,18 @@ class NaiveDataObtainer:
|
|||
' Please manually download the required files'
|
||||
' following the guides.')
|
||||
|
||||
print(f'Start to download {osp.basename(dst_path)}...')
|
||||
print('If you stuck here for a long time, please check your network.')
|
||||
if url.startswith('magnet'):
|
||||
raise NotImplementedError('Please use any BitTorrent client to '
|
||||
'download the following magnet link to '
|
||||
f'{osp.abspath(dst_path)} and '
|
||||
f'try again.\nLink: {url}')
|
||||
|
||||
print('Downloading...')
|
||||
print(f'URL: {url}')
|
||||
print(f'Destination: {osp.abspath(dst_path)}')
|
||||
print('If you stuck here for a long time, please check your network, '
|
||||
'or manually download the file to the destination path and '
|
||||
'run the script again.')
|
||||
request.urlretrieve(url, dst_path, progress)
|
||||
print('')
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
|
|||
from .naf_parser import NAFAnnParser
|
||||
from .sroie_parser import SROIETextDetAnnParser
|
||||
from .svt_parser import SVTTextDetAnnParser
|
||||
from .synthtext_parser import SynthTextAnnParser
|
||||
from .totaltext_parser import TotaltextTextDetAnnParser
|
||||
from .wildreceipt_parser import WildreceiptKIEAnnParser
|
||||
|
||||
|
@ -15,5 +16,6 @@ __all__ = [
|
|||
'BaseParser', 'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
|
||||
'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
|
||||
'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser',
|
||||
'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser'
|
||||
'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser',
|
||||
'SynthTextAnnParser'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from mmengine import track_parallel_progress
|
||||
from scipy.io import loadmat
|
||||
|
||||
from mmocr.utils import is_type_list
|
||||
from ..data_preparer import DATA_PARSERS
|
||||
from .base import BaseParser
|
||||
|
||||
|
||||
@DATA_PARSERS.register_module()
|
||||
class SynthTextAnnParser(BaseParser):
|
||||
"""SynthText Text Detection Annotation Parser.
|
||||
|
||||
Args:
|
||||
split (str): The split of the dataset. It is usually set automatically
|
||||
and users do not need to set it manually in config file in most
|
||||
cases.
|
||||
nproc (int): Number of processes to process the data. Defaults to 1.
|
||||
It is usually set automatically and users do not need to set it
|
||||
manually in config file in most cases.
|
||||
separator (str): The separator between each element in a line. Defaults
|
||||
to ','.
|
||||
ignore (str): The text to be ignored. Defaults to '###'.
|
||||
format (str): The format of the annotation. Defaults to
|
||||
'x1,y1,x2,y2,x3,y3,x4,trans'.
|
||||
encoding (str): The encoding of the annotation file. Defaults to
|
||||
'utf-8-sig'.
|
||||
remove_strs (List[str], Optional): Used to remove redundant strings in
|
||||
the transcription. Defaults to None.
|
||||
mode (str, optional): The mode of the box converter. Supported modes
|
||||
are 'xywh' and 'xyxy'. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
split: str,
|
||||
nproc: int,
|
||||
separator: str = ',',
|
||||
ignore: str = '###',
|
||||
format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
|
||||
encoding: str = 'utf-8',
|
||||
remove_strs: Optional[List[str]] = None,
|
||||
mode: str = None) -> None:
|
||||
self.sep = separator
|
||||
self.format = format
|
||||
self.encoding = encoding
|
||||
self.ignore = ignore
|
||||
self.mode = mode
|
||||
self.remove_strs = remove_strs
|
||||
super().__init__(split=split, nproc=nproc)
|
||||
|
||||
def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray:
|
||||
"""Trace the boundary point of text.
|
||||
|
||||
Args:
|
||||
char_boxes (list[ndarray]): The char boxes for one text. Each
|
||||
element is 4x2 ndarray.
|
||||
|
||||
Returns:
|
||||
ndarray: The boundary point sets with size nx2.
|
||||
"""
|
||||
assert is_type_list(char_boxes, np.ndarray)
|
||||
|
||||
# from top left to to right
|
||||
p_top = [box[0:2] for box in char_boxes]
|
||||
# from bottom right to bottom left
|
||||
p_bottom = [
|
||||
char_boxes[idx][[2, 3], :]
|
||||
for idx in range(len(char_boxes) - 1, -1, -1)
|
||||
]
|
||||
|
||||
p = p_top + p_bottom
|
||||
|
||||
boundary = np.concatenate(p).astype(int)
|
||||
|
||||
return boundary
|
||||
|
||||
def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray,
|
||||
strs: np.ndarray
|
||||
) -> Tuple[List[np.ndarray], List[str]]:
|
||||
"""Match the bboxes, char bboxes, and strs.
|
||||
|
||||
Args:
|
||||
bboxes (ndarray): The text boxes of size (2, 4, num_box).
|
||||
char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
|
||||
strs (ndarray): The string of size (num_strs,)
|
||||
|
||||
Returns:
|
||||
Tuple(List[ndarray], List[str]): Polygon & word list.
|
||||
"""
|
||||
assert isinstance(bboxes, np.ndarray)
|
||||
assert isinstance(char_bboxes, np.ndarray)
|
||||
assert isinstance(strs, np.ndarray)
|
||||
# bboxes = bboxes.astype(np.int32)
|
||||
char_bboxes = char_bboxes.astype(np.int32)
|
||||
|
||||
if len(char_bboxes.shape) == 2:
|
||||
char_bboxes = np.expand_dims(char_bboxes, axis=2)
|
||||
char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
|
||||
num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1]
|
||||
|
||||
poly_charbox_list = [[] for _ in range(num_boxes)]
|
||||
|
||||
words = []
|
||||
for line in strs:
|
||||
words += line.split()
|
||||
words_len = [len(w) for w in words]
|
||||
words_end_inx = np.cumsum(words_len)
|
||||
start_inx = 0
|
||||
for word_inx, end_inx in enumerate(words_end_inx):
|
||||
for char_inx in range(start_inx, end_inx):
|
||||
poly_charbox_list[word_inx].append(char_bboxes[char_inx])
|
||||
start_inx = end_inx
|
||||
|
||||
for box_inx in range(num_boxes):
|
||||
assert len(poly_charbox_list[box_inx]) > 0
|
||||
|
||||
poly_boundary_list = []
|
||||
for item in poly_charbox_list:
|
||||
boundary = np.ndarray((0, 2))
|
||||
if len(item) > 0:
|
||||
boundary = self._trace_boundary(item)
|
||||
poly_boundary_list.append(boundary)
|
||||
|
||||
return poly_boundary_list, words
|
||||
|
||||
def parse_files(self, img_paths: Union[List[str], str],
|
||||
ann_paths: Union[List[str], str]) -> List[Tuple]:
|
||||
"""Convert annotations to MMOCR format.
|
||||
|
||||
Args:
|
||||
img_paths (str or list[str]): the list of image paths or the
|
||||
directory of the images.
|
||||
ann_paths (str or list[str]): the list of annotation paths or the
|
||||
path of the annotation file which contains all the annotations.
|
||||
|
||||
Returns:
|
||||
List[Tuple]: A list of a tuple of (image_path, instances).
|
||||
|
||||
- img_path (str): The path of image file, which can be read
|
||||
directly by opencv.
|
||||
- instance: instance is a list of dict containing parsed
|
||||
annotations, which should contain the following keys:
|
||||
|
||||
- 'poly' or 'box' (textdet or textspotting)
|
||||
- 'text' (textspotting or textrecog)
|
||||
- 'ignore' (all task)
|
||||
"""
|
||||
assert isinstance(ann_paths, str)
|
||||
gt = loadmat(ann_paths)
|
||||
self.img_dir = img_paths
|
||||
samples = track_parallel_progress(
|
||||
self.parse_file,
|
||||
list(
|
||||
zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0],
|
||||
gt['txt'][0])),
|
||||
nproc=self.nproc)
|
||||
return samples
|
||||
|
||||
def parse_file(self, annotation: Tuple) -> Tuple:
|
||||
"""Parse single annotation."""
|
||||
img_file, wordBB, charBB, txt = annotation
|
||||
polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt)
|
||||
|
||||
instances = list()
|
||||
for poly, word in zip(polys_list, word_list):
|
||||
instances.append(
|
||||
dict(poly=poly.flatten().tolist(), text=word, ignore=False))
|
||||
return osp.join(self.img_dir, img_file[0]), instances
|
Loading…
Reference in New Issue