[Feature] Add cute80 to dataset preparer (#1522)

2022-11-15 19:50:32 +08:00 · 2022-11-15 19:50:32 +08:00 · 6b2077ef19
parent 1d5f43e79f
commit 6b2077ef19
3 changed files with 75 additions and 0 deletions
--- a/dataset_zoo/cute80/metafile.yml
+++ b/dataset_zoo/cute80/metafile.yml
@ -0,0 +1,29 @@
+Name: 'CUTE80'
+Paper:
+  Title: A Robust Arbitrary Text Detection System for Natural Scene Images
+  URL: http://cs-chan.com/doc/ESWA_2014A.pdf
+  Venue: ESWA
+  Year: '2014'
+  BibTeX: '@article{risnumawan2014robust,
+  title={A robust arbitrary text detection system for natural scene images},
+  author={Risnumawan, Anhar and Shivakumara, Palaiahankote and Chan, Chee Seng and Tan, Chew Lim},
+  journal={Expert Systems with Applications},
+  volume={41},
+  number={18},
+  pages={8027--8048},
+  year={2014},
+  publisher={Elsevier}}'
+Data:
+  Website: http://cs-chan.com/downloads_CUTE80_dataset.html
+  Language:
+    - English
+  Scene:
+    - Natural Scene
+  Granularity:
+    - Word
+  Tasks:
+    - textrecog
+  License:
+    Type: N/A
+    Link: N/A
+  Format: .txt
--- a/dataset_zoo/cute80/sample_anno.md
+++ b/dataset_zoo/cute80/sample_anno.md
@ -0,0 +1,10 @@
+**Text Recognition**
+
+```text
+# timage/img_name text 1 text
+
+timage/001.jpg RONALDO 1 RONALDO
+timage/002.jpg 7 1 7
+timage/003.jpg SEACREST 1 SEACREST
+timage/004.jpg BEACH 1 BEACH
+```
--- a/dataset_zoo/cute80/textrecog.py
+++ b/dataset_zoo/cute80/textrecog.py
@ -0,0 +1,36 @@
+data_root = 'data/cute80'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
+            'timage.tar.gz',
+            save_name='ct80.tar.gz',
+            md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
+            split=['test'],
+            content=['image'],
+            mapping=[['ct80/timage', 'textrecog_imgs/test']]),
+        dict(
+            url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
+            'test_label.txt',
+            save_name='ct80_test.txt',
+            md5='f679dec62916d3268aff9cd81990d260',
+            split=['test'],
+            content=['annotation'],
+            mapping=[['ct80_test.txt', 'annotations/test.txt']])
+    ])
+
+data_converter = dict(
+    type='TextRecogDataConverter',
+    splits=['test'],
+    data_root=data_root,
+    gatherer=dict(type='mono_gather', mapping="f'{split}.txt'"),
+    parser=dict(
+        type='ICDARTxtTextRecogAnnParser',
+        separator=' ',
+        format='img text ignore1 ignore2'),
+    dumper=dict(type='JsonDumper'))