[Feature] Add cute80 to dataset preparer (#1522)

pull/1533/head
Xinyu Wang 2022-11-15 19:50:32 +08:00 committed by GitHub
parent 1d5f43e79f
commit 6b2077ef19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 75 additions and 0 deletions

View File

@ -0,0 +1,29 @@
Name: 'CUTE80'
Paper:
Title: A Robust Arbitrary Text Detection System for Natural Scene Images
URL: http://cs-chan.com/doc/ESWA_2014A.pdf
Venue: ESWA
Year: '2014'
BibTeX: '@article{risnumawan2014robust,
title={A robust arbitrary text detection system for natural scene images},
author={Risnumawan, Anhar and Shivakumara, Palaiahankote and Chan, Chee Seng and Tan, Chew Lim},
journal={Expert Systems with Applications},
volume={41},
number={18},
pages={8027--8048},
year={2014},
publisher={Elsevier}}'
Data:
Website: http://cs-chan.com/downloads_CUTE80_dataset.html
Language:
- English
Scene:
- Natural Scene
Granularity:
- Word
Tasks:
- textrecog
License:
Type: N/A
Link: N/A
Format: .txt

View File

@ -0,0 +1,10 @@
**Text Recognition**
```text
# timage/img_name text 1 text
timage/001.jpg RONALDO 1 RONALDO
timage/002.jpg 7 1 7
timage/003.jpg SEACREST 1 SEACREST
timage/004.jpg BEACH 1 BEACH
```

View File

@ -0,0 +1,36 @@
data_root = 'data/cute80'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
'timage.tar.gz',
save_name='ct80.tar.gz',
md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
split=['test'],
content=['image'],
mapping=[['ct80/timage', 'textrecog_imgs/test']]),
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
'test_label.txt',
save_name='ct80_test.txt',
md5='f679dec62916d3268aff9cd81990d260',
split=['test'],
content=['annotation'],
mapping=[['ct80_test.txt', 'annotations/test.txt']])
])
data_converter = dict(
type='TextRecogDataConverter',
splits=['test'],
data_root=data_root,
gatherer=dict(type='mono_gather', mapping="f'{split}.txt'"),
parser=dict(
type='ICDARTxtTextRecogAnnParser',
separator=' ',
format='img text ignore1 ignore2'),
dumper=dict(type='JsonDumper'))