mirror of https://github.com/open-mmlab/mmocr.git
[Feature] Add cute80 to dataset preparer (#1522)
parent
1d5f43e79f
commit
6b2077ef19
|
@ -0,0 +1,29 @@
|
|||
Name: 'CUTE80'
|
||||
Paper:
|
||||
Title: A Robust Arbitrary Text Detection System for Natural Scene Images
|
||||
URL: http://cs-chan.com/doc/ESWA_2014A.pdf
|
||||
Venue: ESWA
|
||||
Year: '2014'
|
||||
BibTeX: '@article{risnumawan2014robust,
|
||||
title={A robust arbitrary text detection system for natural scene images},
|
||||
author={Risnumawan, Anhar and Shivakumara, Palaiahankote and Chan, Chee Seng and Tan, Chew Lim},
|
||||
journal={Expert Systems with Applications},
|
||||
volume={41},
|
||||
number={18},
|
||||
pages={8027--8048},
|
||||
year={2014},
|
||||
publisher={Elsevier}}'
|
||||
Data:
|
||||
Website: http://cs-chan.com/downloads_CUTE80_dataset.html
|
||||
Language:
|
||||
- English
|
||||
Scene:
|
||||
- Natural Scene
|
||||
Granularity:
|
||||
- Word
|
||||
Tasks:
|
||||
- textrecog
|
||||
License:
|
||||
Type: N/A
|
||||
Link: N/A
|
||||
Format: .txt
|
|
@ -0,0 +1,10 @@
|
|||
**Text Recognition**
|
||||
|
||||
```text
|
||||
# timage/img_name text 1 text
|
||||
|
||||
timage/001.jpg RONALDO 1 RONALDO
|
||||
timage/002.jpg 7 1 7
|
||||
timage/003.jpg SEACREST 1 SEACREST
|
||||
timage/004.jpg BEACH 1 BEACH
|
||||
```
|
|
@ -0,0 +1,36 @@
|
|||
data_root = 'data/cute80'
|
||||
cache_path = 'data/cache'
|
||||
|
||||
data_obtainer = dict(
|
||||
type='NaiveDataObtainer',
|
||||
cache_path=cache_path,
|
||||
data_root=data_root,
|
||||
files=[
|
||||
dict(
|
||||
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
|
||||
'timage.tar.gz',
|
||||
save_name='ct80.tar.gz',
|
||||
md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
|
||||
split=['test'],
|
||||
content=['image'],
|
||||
mapping=[['ct80/timage', 'textrecog_imgs/test']]),
|
||||
dict(
|
||||
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
|
||||
'test_label.txt',
|
||||
save_name='ct80_test.txt',
|
||||
md5='f679dec62916d3268aff9cd81990d260',
|
||||
split=['test'],
|
||||
content=['annotation'],
|
||||
mapping=[['ct80_test.txt', 'annotations/test.txt']])
|
||||
])
|
||||
|
||||
data_converter = dict(
|
||||
type='TextRecogDataConverter',
|
||||
splits=['test'],
|
||||
data_root=data_root,
|
||||
gatherer=dict(type='mono_gather', mapping="f'{split}.txt'"),
|
||||
parser=dict(
|
||||
type='ICDARTxtTextRecogAnnParser',
|
||||
separator=' ',
|
||||
format='img text ignore1 ignore2'),
|
||||
dumper=dict(type='JsonDumper'))
|
Loading…
Reference in New Issue