mmocr/dataset_zoo/svt/textdet.py

31 lines
1.0 KiB
Python

data_root = 'data/svt'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
save_name='svt.zip',
md5='42d19160010d990ae6223b14f45eff88',
split=['train', 'test'],
content=['image', 'annotations'],
mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
['svt/svt1/test.xml', 'annotations/test.xml'],
['svt/svt1/img', 'textdet_imgs/img']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
gatherer=dict(
type='mono_gather', train_ann='train.xml', test_ann='test.xml'),
parser=dict(type='SVTTextDetAnnParser', data_root=data_root),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'svt'])
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)