[Dataset Preparer] SynthText (#1779)

* [Dataset] Support Synthtext * update * update * finalize setting * fix * textrec * update * add fake magnet obtainer * update rec * update * sample_ann
2023-03-17 14:34:12 +08:00 · 2023-03-17 14:34:12 +08:00 · c6580a48c1
parent 7ef34c4407
commit c6580a48c1
9 changed files with 403 additions and 97 deletions
--- a/configs/textdet/_base_/datasets/synthtext.py
+++ b/configs/textdet/_base_/datasets/synthtext.py
@ -1,17 +1,8 @@
-synthtext_textdet_data_root = 'data/det/synthtext'
+synthtext_textdet_data_root = 'data/synthtext'

 synthtext_textdet_train = dict(
    type='OCRDataset',
    data_root=synthtext_textdet_data_root,
-    ann_file='instances_training.json',
-    data_prefix=dict(img_path='imgs/'),
+    ann_file='textdet_train.json',
    filter_cfg=dict(filter_empty_gt=True, min_size=32),
    pipeline=None)
-
-synthtext_textdet_test = dict(
-    type='OCRDataset',
-    data_root=synthtext_textdet_data_root,
-    ann_file='instances_test.json',
-    data_prefix=dict(img_path='imgs/'),
-    test_mode=True,
-    pipeline=None)
--- a/configs/textrecog/_base_/datasets/synthtext.py
+++ b/configs/textrecog/_base_/datasets/synthtext.py
@ -1,25 +1,19 @@
-synthtext_textrecog_data_root = 'data/rec/SynthText/'
+synthtext_textrecog_data_root = 'data/synthtext'

 synthtext_textrecog_train = dict(
    type='OCRDataset',
    data_root=synthtext_textrecog_data_root,
-    data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
-    ann_file='train_labels.json',
-    test_mode=False,
-    pipeline=None)
-
-synthtext_an_textrecog_train = dict(
-    type='OCRDataset',
-    data_root=synthtext_textrecog_data_root,
-    data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
-    ann_file='alphanumeric_train_labels.json',
-    test_mode=False,
+    ann_file='textrecog_train.json',
    pipeline=None)

 synthtext_sub_textrecog_train = dict(
    type='OCRDataset',
    data_root=synthtext_textrecog_data_root,
-    data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
-    ann_file='subset_train_labels.json',
-    test_mode=False,
+    ann_file='subset_textrecog_train.json',
+    pipeline=None)
+
+synthtext_an_textrecog_train = dict(
+    type='OCRDataset',
+    data_root=synthtext_textrecog_data_root,
+    ann_file='alphanumeric_textrecog_train.json',
    pipeline=None)
--- a/dataset_zoo/synthtext/sample_anno.md
+++ b/dataset_zoo/synthtext/sample_anno.md
@ -0,0 +1,124 @@
+**Text Detection/Recognition/Spotting**
+
+```json
+{
+    "imnames": [['8/ballet_106_0.jpg', ...]],
+    "wordBB": [[[420.58957   418.85016   448.08478   410.3094    117.745026
+                322.30963   322.6857    159.09138   154.27284   260.14597
+                431.9315    427.52274   296.86508    99.56819   108.96211  ]
+               [512.3321    431.88342   519.4515    499.81183   179.0544
+                377.97382   376.4993    203.64464   193.77492   313.61514
+                487.58023   484.64633   365.83176   142.49403   144.90457  ]
+               [511.92203   428.7077    518.7375    499.0373    172.1684
+                378.35858   377.2078    203.3191    193.0739    319.69186
+                485.6758    482.571     365.76303   142.31898   144.43858  ]
+               [420.1795    415.67444   447.3708    409.53485   110.859024
+                322.6944    323.3942    158.76585   153.57182   266.2227
+                430.02707   425.44742   296.79636    99.39314   108.49613  ]]
+
+              [[ 21.06382    46.19922    47.570374   73.95366   197.17792
+                  9.993624   48.437763    9.064571   49.659035  208.57095
+                118.41646   162.82489    29.548729    5.800581   28.812992 ]
+               [ 23.069519   48.254295   50.130234   77.18146   208.71487
+                  8.999153   46.69632     9.698633   50.869553  203.25742
+                122.64043   168.38647    29.660484    6.2558594  29.602367 ]
+               [ 41.827087   68.39458    70.03627    98.65903   245.30832
+                 30.534437   68.589294   32.57161    73.74529   264.40634
+                147.7303    189.70224    72.08       22.759935   50.81941  ]
+               [ 39.82139    66.3395     67.47641    95.43123   233.77136
+                 31.528908   70.33074    31.937548   72.534775  269.71988
+                143.50633   184.14066    71.96825    22.304657   50.030033 ]], ...],
+    "charBB": [[[423.16126397 439.60847343 450.66887979 466.31976402 479.76190495
+                504.59927448 418.80489444 450.13965942 464.16775197 480.46891089
+                502.46437709 413.02373632 433.01396211 446.7222192  470.28467827
+                482.51674486 116.52285438 139.51408587 150.7448586  162.03366629
+                322.84717946 333.54881536 343.28386485 363.07416389 323.48968759
+                337.98503283 356.66355903 160.48517048 174.1707753  189.64454066
+                155.7637383  167.45490471 179.63644201 262.2183876  271.75848874
+                284.05396524 298.26103738 432.8464733  449.15387392 468.07231897
+                428.11482147 445.61538159 469.24565878 296.86441324 323.6603118
+                344.09880401 101.14677814 110.45423597 120.54555495 131.18342618
+                132.20545124 110.01673682 120.83144568 131.35885673]
+               [438.2997574  452.61288403 466.31976402 482.22585715 498.3934528
+                512.20555863 431.88338084 466.11639619 481.73414937 499.62012025
+                519.36789779 432.51717267 449.23571387 465.73425964 484.45139112
+                499.59056304 140.27413679 149.59811175 160.13352083 169.59504507
+                333.55849014 344.33923741 361.08275796 378.09844418 339.92898685
+                355.57692063 376.51230484 174.1707753  189.07871028 203.64462646
+                165.22739457 181.27572412 193.60260894 270.99557614 283.13281739
+                298.75499435 313.61511672 447.1421735  470.27065563 487.02126631
+                446.97485257 468.98979567 484.64633864 317.88691577 341.16094163
+                365.8300006  111.15280603 120.54555495 130.72086821 135.27663717
+                142.4726875  120.1331955  133.07976304 144.75919258]
+               [435.54895424 449.95797159 464.5848793  480.68235876 497.04793842
+                511.1101386  428.95660757 463.61882066 480.14247127 498.2535215
+                518.03243928 429.36600266 447.19056345 463.89483785 482.21016814
+                498.18529977 142.63162835 152.55587851 162.80539142 172.21885945
+                333.35620309 344.09880401 360.86201193 377.82379299 339.7646859
+                355.37508239 376.1110999  172.46032372 187.37816388 201.39094518
+                163.04321987 178.99078221 191.89681939 275.3073355  286.08373072
+                301.85539131 318.57227103 444.54207279 467.53925436 485.27070558
+                444.57367155 466.90671029 482.56302723 317.62908407 340.9131681
+                365.44465854 109.40501176 119.4999228  129.67892444 134.35253232
+                140.97421069 118.61779828 131.34019115 143.25688164]
+               [420.17946701 436.74150236 448.74896556 464.5848793  478.18853922
+                503.4152019  415.67442461 447.3707845  462.35927516 478.8614766
+                500.86810735 409.54560397 430.77026495 444.64606264 467.79077782
+                480.89051912 119.14629674 142.63162835 153.56593297 164.78799774
+                322.69436747 333.35620309 343.11884239 362.84714115 323.37931952
+                337.83763574 356.35573621 158.76583616 172.46032372 187.37816388
+                153.57183805 165.15781218 177.92125239 266.22269514 274.45156305
+                286.82608962 302.69695881 430.02705241 446.01814255 466.05208347
+                425.44741792 443.19481667 466.90671029 296.79634428 323.49707084
+                343.82488703  99.39315359 109.40501176 119.4999228  130.25798537
+                130.70149005 108.49612777 119.08444238 129.84935461]]
+
+              [[ 22.26958901  21.60559248  27.0241972   27.25747678  27.45783459
+                 28.73896576  47.91255579  47.80732383  53.77711568  54.24219042
+                 52.00169325  74.79043429  80.45929285  81.04748707  76.11658669
+                 82.58335942 203.67278213 201.2743445  205.59358622 205.51198143
+                 10.06536976  10.82312635  16.77203865  16.31842372  54.80444433
+                 54.66492     47.33822371  15.08534083  15.18716407   9.62607092
+                 51.06813224  50.18928243  56.16019366 220.78902143 236.08062638
+                231.69267533 209.73652786 124.25352842 119.99631725 128.73732717
+                165.78411123 167.31764153 167.05531699  29.97351822  31.5116502
+                 31.14650552   5.88513488  12.51324147  12.57920537   8.21515307
+                  8.21998849  35.66412031  29.17945741  36.00660903]
+               [ 22.46075572  21.76391911  27.25747678  27.49456029  27.73554156
+                 28.85582217  48.25428361  48.21714995  54.27828788  54.78857757
+                 52.4595556   75.57743634  81.15533616  81.86325615  76.681392
+                 83.31596322 210.04771309 203.83983042 208.00417391 207.41791524
+                  9.79265706  10.55231862  16.36406888  15.97405105  54.64620856
+                 54.49559004  47.09756263  15.18716407  15.29808166   9.69862498
+                 51.27597632  50.48652154  56.49239954 216.92183074 232.02141018
+                226.44624213 203.25738931 125.19349641 121.32658508 130.00428964
+                167.43676857 169.36588297 168.38645076  29.58279603  31.19899202
+                 30.75826599   5.92344996  12.57920537  12.64571832   8.23451892
+                  8.26856497  35.82646468  29.342662    36.22165159]
+               [ 40.15739982  40.47241401  40.79219178  41.14411963  41.50190876
+                 41.80934074  66.81590976  68.05921213  68.6519006   69.30152766
+                 70.01097963  96.14641662  96.04484417  96.89110144  97.81897661
+                 98.62829468 237.26055111 240.35280825 243.54641271 245.04022528
+                 31.33842788  31.14650552  30.84702178  30.54399042  69.80098672
+                 68.7212013   68.62479627  32.13243303  32.34474067  32.54416771
+                 72.82501686  73.31372392  73.70922459 267.74318222 265.39839711
+                259.52741156 253.14023308 144.60810334 145.23371653 147.69958337
+                186.00278322 188.17713786 189.70144388  71.89351759  53.62266986
+                 54.40060855  22.41084398  22.51791234  22.62587258  17.11356079
+                 22.74567232  50.25232032  46.05692507  50.79345235]
+               [ 39.82138755  40.18347166  40.44598236  40.79219178  41.08959901
+                 41.64111176  66.33948982  67.47640971  68.01403337  68.60595247
+                 69.3953105   95.13188979  95.21297344  95.91593691  97.08847413
+                 97.75212171 229.94285119 237.26055111 240.66752705 242.74145162
+                 31.52890731  31.33842788  31.16401306  30.81155638  69.87135926
+                 68.80273568  68.71664209  31.93753588  32.13243303  32.34474067
+                 72.53476992  72.88981775  73.28094858 269.71986636 267.92938572
+                262.93698624 256.88902439 143.50635029 143.61251781 146.24080653
+                184.14064261 185.86853729 188.17713786  71.96823746  53.79651809
+                 54.60870874  22.30465649  22.41084398  22.51791234  17.07939535
+                 22.63671808  50.03002471  45.81009198  50.49899163]], ...],
+    "txt": [['Lines:\nI lost\nKevin ' 'will                ' 'line\nand            '
+              'and\nthe             ' '(and                ' 'the\nout             '
+              'you                 ' "don't\n pkg          "], ...]
+}
+```
--- a/dataset_zoo/synthtext/textdet.py
+++ b/dataset_zoo/synthtext/textdet.py
@ -0,0 +1,30 @@
+data_root = 'data/synthtext'
+cache_path = 'data/cache'
+
+train_preparer = dict(
+    obtainer=dict(
+        type='NaiveDataObtainer',
+        cache_path=cache_path,
+        files=[
+            dict(
+                url='magnet:?xt=urn:btih:2dba9518166cbd141534cbf381aa3e99a08'
+                '7e83c&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&t'
+                'r=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2F'
+                'tracker.opentrackr.org%3A1337%2Fannounce',
+                save_name='SynthText.zip',
+                md5='8ae0309c80ff882f9d6ba5ea62cdb556',
+                split=['train'],
+                content=['image', 'annotation'],
+                mapping=[['SynthText/SynthText/*', 'textdet_imgs/train/'],
+                         ['textdet_imgs/train/gt.mat', 'annotations/gt.mat']]),
+        ]),
+    gatherer=dict(type='MonoGatherer', ann_name='gt.mat'),
+    parser=dict(type='SynthTextAnnParser'),
+    packer=dict(type='TextDetPacker'),
+    dumper=dict(type='JsonDumper'),
+)
+
+delete = ['SynthText', 'annotations']
+
+config_generator = dict(
+    type='TextDetConfigGenerator', data_root=data_root, test_anns=None)
--- a/dataset_zoo/synthtext/textrecog.py
+++ b/dataset_zoo/synthtext/textrecog.py
@ -0,0 +1,30 @@
+_base_ = ['textdet.py']
+
+_base_.train_preparer.obtainer.files.append(
+    dict(
+        url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
+        'SynthText/subset_textrecog_train.json',
+        save_name='subset_textrecog_train.json',
+        md5='151c4edd1cc240362046d3a6f8f4b4c6',
+        split=['train'],
+        content=['annotation']))
+_base_.train_preparer.obtainer.files.append(
+    dict(
+        url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
+        'SynthText/alphanumeric_textrecog_train.json',
+        save_name='alphanumeric_textrecog_train.json',
+        md5='89b80163435794ca117a124d081d68a9',
+        split=['train'],
+        content=['annotation']))
+_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
+_base_.train_preparer.packer.type = 'TextRecogCropPacker'
+
+config_generator = dict(
+    type='TextRecogConfigGenerator',
+    train_anns=[
+        dict(ann_file='textrecog_train.json', dataset_postfix=''),
+        dict(ann_file='subset_textrecog_train.json', dataset_postfix='sub'),
+        dict(
+            ann_file='alphanumeric_textrecog_train.json',
+            dataset_postfix='an'),
+    ])
--- a/docs/en/user_guides/data_prepare/recog.md
+++ b/docs/en/user_guides/data_prepare/recog.md
@ -6,28 +6,27 @@ This page is a manual preparation guide for datasets not yet supported by [Datas

 ## Overview

-|        Dataset        |                        images                         |                         annotation file                         |                         annotation file                         |
-| :-------------------: | :---------------------------------------------------: | :-------------------------------------------------------------: | :-------------------------------------------------------------: |
-|                       |                                                       |                            training                             |                              test                               |
-|       coco_text       | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) |                   [train_labels.json](#TODO)                    |                                -                                |
-|       ICDAR2011       |       [homepage](https://rrc.cvc.uab.es/?ch=1)        |                                -                                |                                -                                |
-|   MJSynth (Syn90k)    | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) |                                -                                |
-| SynthText (Synth800k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) \|[subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) \|  [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) |                                -                                |
-|       SynthAdd        | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg)  (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) |                                -                                |
-|       OpenVINO        | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
-|        DeText         |       [homepage](https://rrc.cvc.uab.es/?ch=9)        |                                -                                |                                -                                |
-|   Lecture Video DB    | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) |                                -                                |                                -                                |
-|         LSVT          |       [homepage](https://rrc.cvc.uab.es/?ch=16)       |                                -                                |                                -                                |
-|         IMGUR         | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) |                                -                                |                                -                                |
-|         KAIST         | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) |                                -                                |                                -                                |
-|         MTWI          | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) |                                -                                |                                -                                |
-|         ReCTS         |       [homepage](https://rrc.cvc.uab.es/?ch=12)       |                                -                                |                                -                                |
-|       IIIT-ILST       | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) |                                -                                |                                -                                |
-|        VinText        | [homepage](https://github.com/VinAIResearch/dict-guided) |                                -                                |                                -                                |
-|          BID          | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) |                                -                                |                                -                                |
-|         RCTW          |    [homepage](https://rctw.vlrlab.net/index.html)     |                                -                                |                                -                                |
-|       HierText        | [homepage](https://github.com/google-research-datasets/hiertext) |                                -                                |                                -                                |
-|          ArT          |       [homepage](https://rrc.cvc.uab.es/?ch=14)       |                                -                                |                                -                                |
+|     Dataset      |                         images                          |                         annotation file                          |                          annotation file                          |
+| :--------------: | :-----------------------------------------------------: | :--------------------------------------------------------------: | :---------------------------------------------------------------: |
+|                  |                                                         |                             training                             |                               test                                |
+|    coco_text     | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads)  |                    [train_labels.json](#TODO)                    |                                 -                                 |
+|    ICDAR2011     |        [homepage](https://rrc.cvc.uab.es/?ch=1)         |                                -                                 |                                 -                                 |
+| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) |                                 -                                 |
+|     SynthAdd     | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg)  (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) |                                 -                                 |
+|     OpenVINO     | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
+|      DeText      |        [homepage](https://rrc.cvc.uab.es/?ch=9)         |                                -                                 |                                 -                                 |
+| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) |                                -                                 |                                 -                                 |
+|       LSVT       |        [homepage](https://rrc.cvc.uab.es/?ch=16)        |                                -                                 |                                 -                                 |
+|      IMGUR       | [homepage](https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset) |                                -                                 |                                 -                                 |
+|      KAIST       | [homepage](http://www.iapr-tc11.org/mediawiki/index.php/KAIST_Scene_Text_Database) |                                -                                 |                                 -                                 |
+|       MTWI       | [homepage](https://tianchi.aliyun.com/competition/entrance/231685/information?lang=en-us) |                                -                                 |                                 -                                 |
+|      ReCTS       |        [homepage](https://rrc.cvc.uab.es/?ch=12)        |                                -                                 |                                 -                                 |
+|    IIIT-ILST     | [homepage](http://cvit.iiit.ac.in/research/projects/cvit-projects/iiit-ilst) |                                -                                 |                                 -                                 |
+|     VinText      | [homepage](https://github.com/VinAIResearch/dict-guided) |                                -                                 |                                 -                                 |
+|       BID        | [homepage](https://github.com/ricardobnjunior/Brazilian-Identity-Document-Dataset) |                                -                                 |                                 -                                 |
+|       RCTW       |     [homepage](https://rctw.vlrlab.net/index.html)      |                                -                                 |                                 -                                 |
+|     HierText     | [homepage](https://github.com/google-research-datasets/hiertext) |                                -                                 |                                 -                                 |
+|       ArT        |        [homepage](https://rrc.cvc.uab.es/?ch=14)        |                                -                                 |                                 -                                 |

 (\*) Since the official homepage is unavailable now, we provide an alternative for quick reference. However, we do not guarantee the correctness of the dataset.

@ -149,52 +148,6 @@ Please make sure you're using the right annotation to train the model by checkin
  │   └── mnt
  ```

-## SynthText (Synth800k)
-
- Step1: Download `SynthText.zip` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)
-
- Step2: According to your actual needs, download the most appropriate one from the following options: [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/train_labels.json) (7,266,686 annotations), [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/subset_train_labels.json) (2,400,000 randomly sampled annotations) and [alphanumeric_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/SynthText/alphanumeric_train_labels.json) (7,239,272 annotations with alphanumeric characters only).
-
-```{warning}
-Please make sure you're using the right annotation to train the model by checking its dataset specs in Model Zoo.
-```
-
- Step3:
-
-  ```bash
-  mkdir SynthText && cd SynthText
-  mv /path/to/SynthText.zip .
-  unzip SynthText.zip
-  mv SynthText synthtext
-
-  mv /path/to/subset_train_labels.json .
-  mv /path/to/train_labels.json .
-  mv /path/to/alphanumeric_train_labels.json .
-
-  # create soft link
-  cd /path/to/mmocr/data/recog
-  ln -s /path/to/SynthText SynthText
-  ```
-
- Step4: Generate cropped images and labels:
-
-  ```bash
-  cd /path/to/mmocr
-
-  python tools/dataset_converters/textrecog/synthtext_converter.py data/recog/SynthText/gt.mat data/recog/SynthText/ data/recog/SynthText/synthtext/SynthText_patch_horizontal --n_proc 8
-  ```
-
- After running the above codes, the directory structure
-  should be as follows:
-
-  ```text
-  ├── SynthText
-  │   ├── alphanumeric_train_labels.json
-  │   ├── subset_train_labels.json
-  │   ├── train_labels.json
-  │   └── synthtext
-  ```
-
 ## SynthAdd

 - Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x))
--- a/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py
+++ b/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py
@ -88,8 +88,18 @@ class NaiveDataObtainer:
                ' Please manually download the required files'
                ' following the guides.')

-        print(f'Start to download {osp.basename(dst_path)}...')
-        print('If you stuck here for a long time, please check your network.')
+        if url.startswith('magnet'):
+            raise NotImplementedError('Please use any BitTorrent client to '
+                                      'download the following magnet link to '
+                                      f'{osp.abspath(dst_path)} and '
+                                      f'try again.\nLink: {url}')
+
+        print('Downloading...')
+        print(f'URL: {url}')
+        print(f'Destination: {osp.abspath(dst_path)}')
+        print('If you stuck here for a long time, please check your network, '
+              'or manually download the file to the destination path and '
+              'run the script again.')
        request.urlretrieve(url, dst_path, progress)
        print('')

--- a/mmocr/datasets/preparers/parsers/init.py
+++ b/mmocr/datasets/preparers/parsers/init.py
@ -8,6 +8,7 @@ from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
 from .naf_parser import NAFAnnParser
 from .sroie_parser import SROIETextDetAnnParser
 from .svt_parser import SVTTextDetAnnParser
+from .synthtext_parser import SynthTextAnnParser
 from .totaltext_parser import TotaltextTextDetAnnParser
 from .wildreceipt_parser import WildreceiptKIEAnnParser

@ -15,5 +16,6 @@ __all__ = [
    'BaseParser', 'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
    'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
    'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser',
-    'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser'
+    'SROIETextDetAnnParser', 'NAFAnnParser', 'CTW1500AnnParser',
+    'SynthTextAnnParser'
 ]
--- a/mmocr/datasets/preparers/parsers/synthtext_parser.py
+++ b/mmocr/datasets/preparers/parsers/synthtext_parser.py
@ -0,0 +1,172 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+from mmengine import track_parallel_progress
+from scipy.io import loadmat
+
+from mmocr.utils import is_type_list
+from ..data_preparer import DATA_PARSERS
+from .base import BaseParser
+
+
+@DATA_PARSERS.register_module()
+class SynthTextAnnParser(BaseParser):
+    """SynthText Text Detection Annotation Parser.
+
+    Args:
+        split (str): The split of the dataset. It is usually set automatically
+            and users do not need to set it manually in config file in most
+            cases.
+        nproc (int): Number of processes to process the data. Defaults to 1.
+            It is usually set automatically and users do not need to set it
+            manually in config file in most cases.
+        separator (str): The separator between each element in a line. Defaults
+            to ','.
+        ignore (str): The text to be ignored. Defaults to '###'.
+        format (str): The format of the annotation. Defaults to
+            'x1,y1,x2,y2,x3,y3,x4,trans'.
+        encoding (str): The encoding of the annotation file. Defaults to
+            'utf-8-sig'.
+        remove_strs (List[str], Optional): Used to remove redundant strings in
+            the transcription. Defaults to None.
+        mode (str, optional): The mode of the box converter. Supported modes
+            are 'xywh' and 'xyxy'. Defaults to None.
+    """
+
+    def __init__(self,
+                 split: str,
+                 nproc: int,
+                 separator: str = ',',
+                 ignore: str = '###',
+                 format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
+                 encoding: str = 'utf-8',
+                 remove_strs: Optional[List[str]] = None,
+                 mode: str = None) -> None:
+        self.sep = separator
+        self.format = format
+        self.encoding = encoding
+        self.ignore = ignore
+        self.mode = mode
+        self.remove_strs = remove_strs
+        super().__init__(split=split, nproc=nproc)
+
+    def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray:
+        """Trace the boundary point of text.
+
+        Args:
+            char_boxes (list[ndarray]): The char boxes for one text. Each
+                element is 4x2 ndarray.
+
+        Returns:
+            ndarray: The boundary point sets with size nx2.
+        """
+        assert is_type_list(char_boxes, np.ndarray)
+
+        # from top left to to right
+        p_top = [box[0:2] for box in char_boxes]
+        # from bottom right to bottom left
+        p_bottom = [
+            char_boxes[idx][[2, 3], :]
+            for idx in range(len(char_boxes) - 1, -1, -1)
+        ]
+
+        p = p_top + p_bottom
+
+        boundary = np.concatenate(p).astype(int)
+
+        return boundary
+
+    def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray,
+                             strs: np.ndarray
+                             ) -> Tuple[List[np.ndarray], List[str]]:
+        """Match the bboxes, char bboxes, and strs.
+
+        Args:
+            bboxes (ndarray): The text boxes of size (2, 4, num_box).
+            char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
+            strs (ndarray): The string of size (num_strs,)
+
+        Returns:
+            Tuple(List[ndarray], List[str]): Polygon & word list.
+        """
+        assert isinstance(bboxes, np.ndarray)
+        assert isinstance(char_bboxes, np.ndarray)
+        assert isinstance(strs, np.ndarray)
+        # bboxes = bboxes.astype(np.int32)
+        char_bboxes = char_bboxes.astype(np.int32)
+
+        if len(char_bboxes.shape) == 2:
+            char_bboxes = np.expand_dims(char_bboxes, axis=2)
+        char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
+        num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1]
+
+        poly_charbox_list = [[] for _ in range(num_boxes)]
+
+        words = []
+        for line in strs:
+            words += line.split()
+        words_len = [len(w) for w in words]
+        words_end_inx = np.cumsum(words_len)
+        start_inx = 0
+        for word_inx, end_inx in enumerate(words_end_inx):
+            for char_inx in range(start_inx, end_inx):
+                poly_charbox_list[word_inx].append(char_bboxes[char_inx])
+            start_inx = end_inx
+
+        for box_inx in range(num_boxes):
+            assert len(poly_charbox_list[box_inx]) > 0
+
+        poly_boundary_list = []
+        for item in poly_charbox_list:
+            boundary = np.ndarray((0, 2))
+            if len(item) > 0:
+                boundary = self._trace_boundary(item)
+            poly_boundary_list.append(boundary)
+
+        return poly_boundary_list, words
+
+    def parse_files(self, img_paths: Union[List[str], str],
+                    ann_paths: Union[List[str], str]) -> List[Tuple]:
+        """Convert annotations to MMOCR format.
+
+        Args:
+            img_paths (str or list[str]): the list of image paths or the
+                directory of the images.
+            ann_paths (str or list[str]): the list of annotation paths or the
+                path of the annotation file which contains all the annotations.
+
+        Returns:
+            List[Tuple]: A list of a tuple of (image_path, instances).
+
+            - img_path (str): The path of image file, which can be read
+              directly by opencv.
+            - instance: instance is a list of dict containing parsed
+              annotations, which should contain the following keys:
+
+              - 'poly' or 'box' (textdet or textspotting)
+              - 'text' (textspotting or textrecog)
+              - 'ignore' (all task)
+        """
+        assert isinstance(ann_paths, str)
+        gt = loadmat(ann_paths)
+        self.img_dir = img_paths
+        samples = track_parallel_progress(
+            self.parse_file,
+            list(
+                zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0],
+                    gt['txt'][0])),
+            nproc=self.nproc)
+        return samples
+
+    def parse_file(self, annotation: Tuple) -> Tuple:
+        """Parse single annotation."""
+        img_file, wordBB, charBB, txt = annotation
+        polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt)
+
+        instances = list()
+        for poly, word in zip(polys_list, word_list):
+            instances.append(
+                dict(poly=poly.flatten().tolist(), text=word, ignore=False))
+        return osp.join(self.img_dir, img_file[0]), instances