diff --git a/dataset_zoo/cute80/metafile.yml b/dataset_zoo/cute80/metafile.yml new file mode 100644 index 00000000..32f3a62a --- /dev/null +++ b/dataset_zoo/cute80/metafile.yml @@ -0,0 +1,29 @@ +Name: 'CUTE80' +Paper: + Title: A Robust Arbitrary Text Detection System for Natural Scene Images + URL: http://cs-chan.com/doc/ESWA_2014A.pdf + Venue: ESWA + Year: '2014' + BibTeX: '@article{risnumawan2014robust, + title={A robust arbitrary text detection system for natural scene images}, + author={Risnumawan, Anhar and Shivakumara, Palaiahankote and Chan, Chee Seng and Tan, Chew Lim}, + journal={Expert Systems with Applications}, + volume={41}, + number={18}, + pages={8027--8048}, + year={2014}, + publisher={Elsevier}}' +Data: + Website: http://cs-chan.com/downloads_CUTE80_dataset.html + Language: + - English + Scene: + - Natural Scene + Granularity: + - Word + Tasks: + - textrecog + License: + Type: N/A + Link: N/A + Format: .txt diff --git a/dataset_zoo/cute80/sample_anno.md b/dataset_zoo/cute80/sample_anno.md new file mode 100644 index 00000000..0f08bd6e --- /dev/null +++ b/dataset_zoo/cute80/sample_anno.md @@ -0,0 +1,10 @@ +**Text Recognition** + +```text +# timage/img_name text 1 text + +timage/001.jpg RONALDO 1 RONALDO +timage/002.jpg 7 1 7 +timage/003.jpg SEACREST 1 SEACREST +timage/004.jpg BEACH 1 BEACH +``` diff --git a/dataset_zoo/cute80/textrecog.py b/dataset_zoo/cute80/textrecog.py new file mode 100644 index 00000000..3d230a4b --- /dev/null +++ b/dataset_zoo/cute80/textrecog.py @@ -0,0 +1,36 @@ +data_root = 'data/cute80' +cache_path = 'data/cache' + +data_obtainer = dict( + type='NaiveDataObtainer', + cache_path=cache_path, + data_root=data_root, + files=[ + dict( + url='https://download.openmmlab.com/mmocr/data/mixture/ct80/' + 'timage.tar.gz', + save_name='ct80.tar.gz', + md5='9f3b1fe0e76f1fdfc70de3a365603d5e', + split=['test'], + content=['image'], + mapping=[['ct80/timage', 'textrecog_imgs/test']]), + dict( + url='https://download.openmmlab.com/mmocr/data/mixture/ct80/' + 'test_label.txt', + save_name='ct80_test.txt', + md5='f679dec62916d3268aff9cd81990d260', + split=['test'], + content=['annotation'], + mapping=[['ct80_test.txt', 'annotations/test.txt']]) + ]) + +data_converter = dict( + type='TextRecogDataConverter', + splits=['test'], + data_root=data_root, + gatherer=dict(type='mono_gather', mapping="f'{split}.txt'"), + parser=dict( + type='ICDARTxtTextRecogAnnParser', + separator=' ', + format='img text ignore1 ignore2'), + dumper=dict(type='JsonDumper'))