mirror of https://github.com/open-mmlab/mmocr.git
54 lines
1.7 KiB
Python
54 lines
1.7 KiB
Python
data_root = 'data/mjsynth'
|
|
cache_path = 'data/cache'
|
|
|
|
train_preparer = dict(
|
|
obtainer=dict(
|
|
type='NaiveDataObtainer',
|
|
cache_path=cache_path,
|
|
files=[
|
|
dict(
|
|
url='https://thor.robots.ox.ac.uk/~vgg/data/text/'
|
|
'mjsynth.tar.gz',
|
|
save_name='mjsynth.tar.gz',
|
|
md5='7bf2b60ad935eaf64e5b606f782d68e5',
|
|
split=['train'],
|
|
content=['image', 'annotation'],
|
|
mapping=[
|
|
[
|
|
'mjsynth/mnt/ramdisk/max/90kDICT32px/*/',
|
|
'textrecog_imgs/train/'
|
|
],
|
|
[
|
|
'mjsynth/mnt/ramdisk/max/90kDICT32px/annotation.txt',
|
|
'annotations/annotation.txt'
|
|
]
|
|
]),
|
|
dict(
|
|
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
|
|
'Syn90k/subset_textrecog_train.json',
|
|
save_name='subset_textrecog_train.json',
|
|
md5='ba958d87bb170980f39e194180c15b9e',
|
|
split=['train'],
|
|
content=['annotation'])
|
|
]),
|
|
gatherer=dict(type='MonoGatherer', ann_name='annotation.txt'),
|
|
parser=dict(
|
|
type='MJSynthAnnParser',
|
|
separator=' ',
|
|
format='img num',
|
|
remove_strs=None),
|
|
packer=dict(type='TextRecogPacker'),
|
|
dumper=dict(type='JsonDumper'),
|
|
)
|
|
|
|
delete = ['mjsynth', 'annotations']
|
|
|
|
config_generator = dict(
|
|
type='TextRecogConfigGenerator',
|
|
data_root=data_root,
|
|
train_anns=[
|
|
dict(ann_file='textrecog_train.json', dataset_postfix=''),
|
|
dict(ann_file='subset_textrecog_train.json', dataset_postfix='sub'),
|
|
],
|
|
test_anns=None)
|