[Refactor] Refactor data converter and gather (#1707)

* Refactor dataprepare, abstract gather, packer

* update ic13 ic15 naf iiit5k cute80 funsd

* update dataset zoo config

* add ut

* finsh docstring

* fix coco

* fix comment
pull/1778/head
liukuikun 2023-03-03 15:27:19 +08:00 committed by GitHub
parent 3aa9572a64
commit 82f81ff67c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
94 changed files with 3321 additions and 2069 deletions

View File

@ -1,41 +1,39 @@
data_root = 'data/cocotextv2'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='http://images.cocodataset.org/zips/train2014.zip',
save_name='cocotextv2_train_img.zip',
md5='0da8c0bd3d6becc4dcb32757491aca88',
split=['train', 'val'],
content=['image'],
mapping=[['cocotextv2_train_img/train2014',
'textdet_imgs/train']]),
dict(
url='https://github.com/bgshih/cocotext/releases/download/dl/'
'cocotext.v2.zip',
save_name='cocotextv2_annotation.zip',
md5='5e39f7d6f2f11324c6451e63523c440c',
split=['train', 'val'],
content=['annotation'],
mapping=[[
'cocotextv2_annotation/cocotext.v2.json',
'annotations/train.json'
]]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train'],
data_root=data_root,
gatherer=dict(type='mono_gather', train_ann='train.json'),
parser=dict(
type='COCOTextDetAnnParser',
variant='cocotext',
data_root=data_root + '/textdet_imgs/train'),
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='http://images.cocodataset.org/zips/train2014.zip',
save_name='cocotextv2_train_img.zip',
md5='0da8c0bd3d6becc4dcb32757491aca88',
content=['image'],
mapping=[[
'cocotextv2_train_img/train2014', 'textdet_imgs/imgs'
]]),
dict(
url='https://github.com/bgshih/cocotext/releases/download/dl/'
'cocotext.v2.zip',
save_name='cocotextv2_annotation.zip',
md5='5e39f7d6f2f11324c6451e63523c440c',
content=['annotation'],
mapping=[[
'cocotextv2_annotation/cocotext.v2.json',
'annotations/train.json'
]]),
]),
gatherer=dict(
type='MonoGatherer',
ann_name='train.json',
img_dir='textdet_imgs/imgs'),
parser=dict(type='COCOTextDetAnnParser', variant='cocotext'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'))
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
val_preparer = train_preparer
delete = ['annotations', 'cocotextv2_annotation', 'cocotextv2_train_img']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.val_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -2,51 +2,65 @@
# the fixed version as done in
# https://github.com/clovaai/deep-text-recognition-benchmark by default.
# If you want to use the original version, please comment out the following
# lines: L31-L38, and uncomment L23-L30, L40-L49.
# lines: L10-L31, and uncomment L33-L63
data_root = 'data/cute80'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
'timage.tar.gz',
save_name='ct80.tar.gz',
md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
split=['test'],
content=['image'],
mapping=[['ct80/timage', 'textrecog_imgs/test']]),
# dict(
# url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
# 'test_label.txt',
# save_name='ct80_test.txt',
# md5='f679dec62916d3268aff9cd81990d260',
# split=['test'],
# content=['annotation'],
# mapping=[['ct80_test.txt', 'annotations/test.txt']])
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/ct80/'
'textrecog_test.json',
save_name='textrecog_test.json',
md5='9c5c79d843b900325e7fd453b318cad9',
split=['test'],
content=['annotation'])
])
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
'timage.tar.gz',
save_name='ct80.tar.gz',
md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
split=['test'],
content=['image'],
mapping=[['ct80/timage', 'textrecog_imgs/test']]),
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/ct80/'
'textrecog_test.json',
save_name='textrecog_test.json',
md5='9c5c79d843b900325e7fd453b318cad9',
split=['test'],
content=['annotation'])
]))
# data_converter = dict(
# type='TextRecogDataConverter',
# splits=['test'],
# data_root=data_root,
# gatherer=dict(type='mono_gather', test_ann='test.txt'),
# test_preparer = dict(
# obtainer=dict(
# type='NaiveDataObtainer',
# cache_path=cache_path,
# data_root=data_root,
# files=[
# dict(
# url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
# 'timage.tar.gz',
# save_name='ct80.tar.gz',
# md5='9f3b1fe0e76f1fdfc70de3a365603d5e',
# split=['test'],
# content=['image'],
# mapping=[['ct80/timage', 'textrecog_imgs/test']]),
# dict(
# url='https://download.openmmlab.com/mmocr/data/mixture/ct80/'
# 'test_label.txt',
# save_name='ct80_test.txt',
# md5='f679dec62916d3268aff9cd81990d260',
# split=['test'],
# content=['annotation'],
# mapping=[['ct80_test.txt', 'annotations/test.txt']])
# ]),
# gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
# parser=dict(
# type='ICDARTxtTextRecogAnnParser',
# separator=' ',
# format='img text ignore1 ignore2'),
# dumper=dict(type='JsonDumper'))
# packer=dict(type='TextRecogPacker'),
# dumper=dict(type='JsonDumper'),
# )
delete = ['ct80']
config_generator = dict(
type='TextRecogConfigGenerator', data_root=data_root, train_anns=None)

View File

@ -1,38 +1,62 @@
data_root = 'data/funsd'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://guillaumejaume.github.io/FUNSD/dataset.zip',
save_name='funsd.zip',
md5='e05de47de238aa343bf55d8807d659a9',
split=['train', 'test'],
content=['image', 'annotation'],
mapping=[
['funsd/dataset/training_data/images', 'textdet_imgs/train'],
['funsd/dataset/testing_data/images', 'textdet_imgs/test'],
[
'funsd/dataset/training_data/annotations',
'annotations/train'
],
['funsd/dataset/testing_data/annotations', 'annotations/test'],
]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://guillaumejaume.github.io/FUNSD/dataset.zip',
save_name='funsd.zip',
md5='e05de47de238aa343bf55d8807d659a9',
content=['image', 'annotation'],
mapping=[
[
'funsd/dataset/training_data/images',
'textdet_imgs/train'
],
[
'funsd/dataset/training_data/annotations',
'annotations/train'
],
]),
]),
gatherer=dict(
type='pair_gather',
suffixes=['.png'],
type='PairGatherer',
img_suffixes=['.png'],
rule=[r'(\w+)\.png', r'\1.json']),
parser=dict(type='FUNSDTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'funsd'])
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://guillaumejaume.github.io/FUNSD/dataset.zip',
save_name='funsd.zip',
md5='e05de47de238aa343bf55d8807d659a9',
content=['image', 'annotation'],
mapping=[
['funsd/dataset/testing_data/images', 'textdet_imgs/test'],
[
'funsd/dataset/testing_data/annotations',
'annotations/test'
],
]),
]),
gatherer=dict(
type='PairGatherer',
img_suffixes=['.png'],
rule=[r'(\w+)\.png', r'\1.json']),
parser=dict(type='FUNSDTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
delete = ['annotations', 'funsd']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,9 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.test_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,52 +1,29 @@
data_root = 'data/icdar2013'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task12_Images.zip',
save_name='ic13_textdet_train_img.zip',
md5='a443b9649fda4229c9bc52751bad08fb',
split=['train'],
content=['image'],
mapping=[['ic13_textdet_train_img', 'textdet_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task12_Images.zip',
save_name='ic13_textdet_test_img.zip',
md5='af2e9f070c4c6a1c7bdb7b36bacf23e3',
split=['test'],
content=['image'],
mapping=[['ic13_textdet_test_img', 'textdet_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task1_GT.zip',
save_name='ic13_textdet_train_gt.zip',
md5='f3a425284a66cd67f455d389c972cce4',
split=['train'],
content=['annotation'],
mapping=[['ic13_textdet_train_gt', 'annotations/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task1_GT.zip',
save_name='ic13_textdet_test_gt.zip',
md5='3191c34cd6ac28b60f5a7db7030190fb',
split=['test'],
content=['annotation'],
mapping=[['ic13_textdet_test_gt', 'annotations/test']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task12_Images.zip',
save_name='ic13_textdet_train_img.zip',
md5='a443b9649fda4229c9bc52751bad08fb',
content=['image'],
mapping=[['ic13_textdet_train_img', 'textdet_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task1_GT.zip',
save_name='ic13_textdet_train_gt.zip',
md5='f3a425284a66cd67f455d389c972cce4',
content=['annotation'],
mapping=[['ic13_textdet_train_gt', 'annotations/train']]),
]),
gatherer=dict(
type='pair_gather',
suffixes=['.jpg'],
type='PairGatherer',
img_suffixes=['.jpg'],
rule=[r'(\w+)\.jpg', r'gt_\1.txt']),
parser=dict(
type='ICDARTxtTextDetAnnParser',
@ -54,6 +31,45 @@ data_converter = dict(
format='x1 y1 x2 y2 trans',
separator=' ',
mode='xyxy'),
dumper=dict(type='JsonDumper'))
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task12_Images.zip',
save_name='ic13_textdet_test_img.zip',
md5='af2e9f070c4c6a1c7bdb7b36bacf23e3',
content=['image'],
mapping=[['ic13_textdet_test_img', 'textdet_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task1_GT.zip',
save_name='ic13_textdet_test_gt.zip',
md5='3191c34cd6ac28b60f5a7db7030190fb',
content=['annotation'],
mapping=[['ic13_textdet_test_gt', 'annotations/test']]),
]),
gatherer=dict(
type='PairGatherer',
img_suffixes=['.jpg'],
rule=[r'(\w+)\.jpg', r'gt_\1.txt']),
parser=dict(
type='ICDARTxtTextDetAnnParser',
remove_strs=[',', '"'],
format='x1 y1 x2 y2 trans',
separator=' ',
mode='xyxy'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
delete = [
'annotations', 'ic13_textdet_train_img', 'ic13_textdet_train_gt',
'ic13_textdet_test_img', 'ic13_textdet_test_gt'
]
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -8,87 +8,118 @@
data_root = 'data/icdar2013'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task3_Images_GT.zip',
save_name='ic13_textrecog_train_img_gt.zip',
md5='6f0dbc823645968030878df7543f40a4',
split=['train'],
content=['image'],
mapping=[[
'ic13_textrecog_train_img_gt/gt.txt', 'annotations/train.txt'
], ['ic13_textrecog_train_img_gt', 'textrecog_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task3_Images.zip',
save_name='ic13_textrecog_test_img.zip',
md5='3206778eebb3a5c5cc15c249010bf77f',
split=['test'],
content=['image'],
mapping=[['ic13_textrecog_test_img', 'textrecog_imgs/test']]),
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/train_labels.json',
save_name='ic13_train_labels.json',
md5='008fcd0056e72c4cf3064fb4d1fce81b',
split=['train'],
content=['annotation'],
mapping=[['ic13_train_labels.json', 'textrecog_train.json']]),
# Note that we offer two versions of test set annotations as follows.
# Please choose one of them to download and comment the other. By
# default, we use the second one.
# 1. The original official annotation, which contains 1095 test
# samples.
# dict(
# url='https://rrc.cvc.uab.es/downloads/'
# 'Challenge2_Test_Task3_GT.txt',
# save_name='ic13_textrecog_test_gt.txt',
# md5='2634060ed8fe6e7a4a9b8d68785835a1',
# split=['test'],
# content=['annotation'],
# mapping=[['ic13_textrecog_test_gt.txt', 'annotations/test.txt']]), # noqa
# 2. The widely-used version for academic purpose, which filters out
# words with non-alphanumeric characters. This version contains 1015
# test samples.
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/textrecog_test_1015.json',
save_name='textrecog_test.json',
md5='68fdd818f63df8b93dc952478952009a',
split=['test'],
content=['annotation'],
),
# 3. The 857 version further pruned words shorter than 3 characters.
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/textrecog_test_857.json',
save_name='textrecog_test_857.json',
md5='3bed3985b0c51a989ad4006f6de8352b',
split=['test'],
content=['annotation'],
),
])
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Training_Task3_Images_GT.zip',
save_name='ic13_textrecog_train_img_gt.zip',
md5='6f0dbc823645968030878df7543f40a4',
content=['image'],
mapping=[
# ['ic13_textrecog_train_img_gt/gt.txt',
# 'annotations/train.txt'],
['ic13_textrecog_train_img_gt', 'textrecog_imgs/train']
]),
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/train_labels.json',
save_name='ic13_train_labels.json',
md5='008fcd0056e72c4cf3064fb4d1fce81b',
content=['annotation'],
mapping=[['ic13_train_labels.json', 'textrecog_train.json']]),
]))
# Uncomment the data converter if you want to use the original 1095 version.
# data_converter = dict(
# type='TextRecogDataConverter',
# splits=['train', 'test'],
# data_root=data_root,
# gatherer=dict(
# type='mono_gather', train_ann='train.txt', test_ann='test.txt'),
# Note that we offer two versions of test set annotations as follows.Please
# choose one of them to download and comment the other. By default, we use the
# second one.
# 1. The original official annotation, which contains 1095 test
# samples.
# Uncomment the test_preparer if you want to use the original 1095 version.
# test_preparer = dict(
# obtainer=dict(
# type='NaiveDataObtainer',
# cache_path=cache_path,
# files=[
# dict(
# url='https://rrc.cvc.uab.es/downloads/'
# 'Challenge2_Test_Task3_Images.zip',
# save_name='ic13_textrecog_test_img.zip',
# md5='3206778eebb3a5c5cc15c249010bf77f',
# split=['test'],
# content=['image'],
# mapping=[['ic13_textrecog_test_img',
# 'textrecog_imgs/test']]),
# dict(
# url='https://rrc.cvc.uab.es/downloads/'
# 'Challenge2_Test_Task3_GT.txt',
# save_name='ic13_textrecog_test_gt.txt',
# md5='2634060ed8fe6e7a4a9b8d68785835a1',
# split=['test'],
# content=['annotation'],
# mapping=[[
# 'ic13_textrecog_test_gt.txt', 'annotations/test.txt'
# ]]), # noqa
# # The 857 version further pruned words shorter than 3 characters.
# dict(
# url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
# 'icdar_2013/textrecog_test_857.json',
# save_name='textrecog_test_857.json',
# md5='3bed3985b0c51a989ad4006f6de8352b',
# split=['test'],
# content=['annotation'],
# ),
# ]),
# gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
# parser=dict(
# type='ICDARTxtTextRecogAnnParser', separator=', ',
# format='img, text'), # noqa
# dumper=dict(type='JsonDumper'))
# packer=dict(type='TextRecogPacker'),
# dumper=dict(type='JsonDumper'),
# )
# 2. The widely-used version for academic purpose, which filters
# out words with non-alphanumeric characters. This version contains
# 1015 test samples.
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge2_Test_Task3_Images.zip',
save_name='ic13_textrecog_test_img.zip',
md5='3206778eebb3a5c5cc15c249010bf77f',
split=['test'],
content=['image'],
mapping=[['ic13_textrecog_test_img', 'textrecog_imgs/test']]),
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/textrecog_test_1015.json',
save_name='textrecog_test.json',
md5='68fdd818f63df8b93dc952478952009a',
split=['test'],
content=['annotation'],
),
# The 857 version further pruned words shorter than 3 characters.
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2013/textrecog_test_857.json',
save_name='textrecog_test_857.json',
md5='3bed3985b0c51a989ad4006f6de8352b',
split=['test'],
content=['annotation'],
),
]))
config_generator = dict(
type='TextRecogConfigGenerator',
data_root=data_root,
test_anns=[
dict(ann_file='textrecog_test.json'),
dict(dataset_postfix='857', ann_file='textrecog_test_857.json')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,53 +1,60 @@
data_root = 'data/icdar2015'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/ch4_training_images.zip',
save_name='ic15_textdet_train_img.zip',
md5='c51cbace155dcc4d98c8dd19d378f30d',
split=['train'],
content=['image'],
mapping=[['ic15_textdet_train_img', 'textdet_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/ch4_test_images.zip',
save_name='ic15_textdet_test_img.zip',
md5='97e4c1ddcf074ffcc75feff2b63c35dd',
split=['test'],
content=['image'],
mapping=[['ic15_textdet_test_img', 'textdet_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'ch4_training_localization_transcription_gt.zip',
save_name='ic15_textdet_train_gt.zip',
md5='3bfaf1988960909014f7987d2343060b',
split=['train'],
content=['annotation'],
mapping=[['ic15_textdet_train_gt', 'annotations/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge4_Test_Task4_GT.zip',
save_name='ic15_textdet_test_gt.zip',
md5='8bce173b06d164b98c357b0eb96ef430',
split=['test'],
content=['annotation'],
mapping=[['ic15_textdet_test_gt', 'annotations/test']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/ch4_training_images.zip',
save_name='ic15_textdet_train_img.zip',
md5='c51cbace155dcc4d98c8dd19d378f30d',
content=['image'],
mapping=[['ic15_textdet_train_img', 'textdet_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'ch4_training_localization_transcription_gt.zip',
save_name='ic15_textdet_train_gt.zip',
md5='3bfaf1988960909014f7987d2343060b',
content=['annotation'],
mapping=[['ic15_textdet_train_gt', 'annotations/train']]),
]),
gatherer=dict(
type='pair_gather',
suffixes=['.jpg', '.JPG'],
type='PairGatherer',
img_suffixes=['.jpg', '.JPG'],
rule=[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt']),
parser=dict(type='ICDARTxtTextDetAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'ic15_textdet_test_img', 'ic15_textdet_train_img'])
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/ch4_test_images.zip',
save_name='ic15_textdet_test_img.zip',
md5='97e4c1ddcf074ffcc75feff2b63c35dd',
content=['image'],
mapping=[['ic15_textdet_test_img', 'textdet_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge4_Test_Task4_GT.zip',
save_name='ic15_textdet_test_gt.zip',
md5='8bce173b06d164b98c357b0eb96ef430',
content=['annotation'],
mapping=[['ic15_textdet_test_gt', 'annotations/test']]),
]),
gatherer=dict(
type='PairGatherer',
img_suffixes=['.jpg', '.JPG'],
rule=[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt']),
parser=dict(type='ICDARTxtTextDetAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
config_generator = dict(type='TextDetConfigGenerator')
delete = ['annotations', 'ic15_textdet_test_img', 'ic15_textdet_train_img']

View File

@ -4,61 +4,66 @@
data_root = 'data/icdar2015'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'ch4_training_word_images_gt.zip',
save_name='ic15_textrecog_train_img_gt.zip',
md5='600caf8c6a64a3dcf638839820edcca9',
split=['train'],
content=['image', 'annotation'],
mapping=[[
'ic15_textrecog_train_img_gt/gt.txt', 'annotations/train.txt'
], ['ic15_textrecog_train_img_gt', 'textrecog_imgs/train']]),
dict(
url='https://rrc.cvc.uab.es/downloads/ch4_test_word_images_gt.zip',
save_name='ic15_textrecog_test_img.zip',
md5='d7a71585f4cc69f89edbe534e7706d5d',
split=['test'],
content=['image'],
mapping=[['ic15_textrecog_test_img', 'textrecog_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge4_Test_Task3_GT.txt',
save_name='ic15_textrecog_test_gt.txt',
md5='d7a71585f4cc69f89edbe534e7706d5d',
split=['test'],
content=['annotation'],
mapping=[['ic15_textrecog_test_gt.txt', 'annotations/test.txt']]),
# 3. The 1811 version discards non-alphanumeric character images and
# some extremely rotated, perspective-shifted, and curved images for
# evaluation
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2015/textrecog_test_1811.json',
save_name='textrecog_test_1811.json',
md5='8d218ef1c37540ea959e22eeabc79ae4',
split=['test'],
content=['annotation'],
),
])
data_converter = dict(
type='TextRecogDataConverter',
splits=['train', 'test'],
data_root=data_root,
gatherer=dict(
type='mono_gather', train_ann='train.txt', test_ann='test.txt'),
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'ch4_training_word_images_gt.zip',
save_name='ic15_textrecog_train_img_gt.zip',
md5='600caf8c6a64a3dcf638839820edcca9',
content=['image', 'annotation'],
mapping=[[
'ic15_textrecog_train_img_gt/gt.txt',
'annotations/train.txt'
], ['ic15_textrecog_train_img_gt', 'textrecog_imgs/train']]),
]),
gatherer=dict(type='MonoGatherer', ann_name='train.txt'),
parser=dict(type='ICDARTxtTextRecogAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextRecogPacker'),
dumper=dict(type='JsonDumper'))
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://rrc.cvc.uab.es/downloads/'
'ch4_test_word_images_gt.zip',
save_name='ic15_textrecog_test_img.zip',
md5='d7a71585f4cc69f89edbe534e7706d5d',
content=['image'],
mapping=[['ic15_textrecog_test_img', 'textrecog_imgs/test']]),
dict(
url='https://rrc.cvc.uab.es/downloads/'
'Challenge4_Test_Task3_GT.txt',
save_name='ic15_textrecog_test_gt.txt',
md5='d7a71585f4cc69f89edbe534e7706d5d',
content=['annotation'],
mapping=[[
'ic15_textrecog_test_gt.txt', 'annotations/test.txt'
]]),
# 3. The 1811 version discards non-alphanumeric character images
# and some extremely rotated, perspective-shifted, and curved
# images for evaluation
dict(
url='https://download.openmmlab.com/mmocr/data/1.x/recog/'
'icdar_2015/textrecog_test_1811.json',
save_name='textrecog_test_1811.json',
md5='8d218ef1c37540ea959e22eeabc79ae4',
content=['annotation'],
),
]),
gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
parser=dict(type='ICDARTxtTextRecogAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextRecogPacker'),
dumper=dict(type='JsonDumper'))
delete = ['annotations']
config_generator = dict(
type='TextRecogConfigGenerator',
data_root=data_root,
test_anns=[
dict(ann_file='textrecog_test.json'),
dict(dataset_postfix='1811', ann_file='textrecog_test_1811.json')

View File

@ -1,5 +1,7 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,50 +1,64 @@
data_root = 'data/iiit5k'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/'
'IIIT5K-Word_V3.0.tar.gz',
save_name='IIIT5K.tar.gz',
md5='56781bc327d22066aa1c239ee788fd46',
split=['test', 'train'],
content=['image'],
mapping=[['IIIT5K/IIIT5K/test', 'textrecog_imgs/test'],
['IIIT5K/IIIT5K/train', 'textrecog_imgs/train']]),
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/'
'test_label.txt',
save_name='iiit5k_test.txt',
md5='82ecfa34a28d59284d1914dc906f5380',
split=['test'],
content=['annotation'],
mapping=[['iiit5k_test.txt', 'annotations/test.txt']]),
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/'
'train_label.txt',
save_name='iiit5k_train.txt',
md5='f4731ce1eadc259532c2834266e5126d',
split=['train'],
content=['annotation'],
mapping=[['iiit5k_train.txt', 'annotations/train.txt']]),
])
data_converter = dict(
type='TextRecogDataConverter',
splits=['train', 'test'],
data_root=data_root,
gatherer=dict(
type='mono_gather', train_ann='train.txt', test_ann='test.txt'),
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/'
'IIIT5K-Word_V3.0.tar.gz',
save_name='IIIT5K.tar.gz',
md5='56781bc327d22066aa1c239ee788fd46',
content=['image'],
mapping=[['IIIT5K/IIIT5K/train', 'textrecog_imgs/train']]),
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/'
'train_label.txt',
save_name='iiit5k_train.txt',
md5='f4731ce1eadc259532c2834266e5126d',
content=['annotation'],
mapping=[['iiit5k_train.txt', 'annotations/train.txt']])
]),
gatherer=dict(type='MonoGatherer', ann_name='train.txt'),
parser=dict(
type='ICDARTxtTextRecogAnnParser',
encoding='utf-8',
separator=' ',
format='img text'),
packer=dict(type='TextRecogPacker'),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'IIIT5K'])
)
config_generator = dict(type='TextRecogConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/'
'IIIT5K-Word_V3.0.tar.gz',
save_name='IIIT5K.tar.gz',
md5='56781bc327d22066aa1c239ee788fd46',
content=['image'],
mapping=[['IIIT5K/IIIT5K/test', 'textrecog_imgs/test']]),
dict(
url='https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/'
'test_label.txt',
save_name='iiit5k_test.txt',
md5='82ecfa34a28d59284d1914dc906f5380',
content=['annotation'],
mapping=[['iiit5k_test.txt', 'annotations/test.txt']])
]),
gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
parser=dict(
type='ICDARTxtTextRecogAnnParser',
encoding='utf-8',
separator=' ',
format='img text'),
packer=dict(type='TextRecogPacker'),
dumper=dict(type='JsonDumper'),
)
delete = ['annotations', 'IIIT5K']
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,17 +1,15 @@
data_root = 'data/naf'
cache_path = 'data/cache'
data_obtainer = dict(
obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://github.com/herobd/NAF_dataset/releases/'
'download/v1.0/labeled_images.tar.gz',
save_name='naf_image.tar.gz',
md5='6521cdc25c313a1f2928a16a77ad8f29',
split=['train', 'test', 'val'],
content=['image'],
mapping=[['naf_image/labeled_images', 'temp_images/']]),
dict(
@ -19,7 +17,6 @@ data_obtainer = dict(
'refs/heads/master.zip',
save_name='naf_anno.zip',
md5='abf5af6266cc527d772231751bc884b3',
split=['train', 'test', 'val'],
content=['annotation'],
mapping=[
[
@ -33,17 +30,21 @@ data_obtainer = dict(
]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test', 'val'],
data_root=data_root,
gatherer=dict(type='naf_gather'),
parser=dict(type='NAFAnnParser', data_root=data_root, det=True),
delete=['temp_images', 'data_split.json', 'annotations', 'naf_anno'],
train_preparer = dict(
obtainer=obtainer,
gatherer=dict(type='NAFGatherer'),
parser=dict(type='NAFAnnParser', det=True),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
nproc=1)
)
test_preparer = train_preparer
val_preparer = train_preparer
delete = [
'temp_images', 'data_split.json', 'annotations', 'naf_anno', 'naf_image'
]
config_generator = dict(
type='TextDetConfigGenerator',
data_root=data_root,
val_anns=[dict(ann_file='textdet_val.json', dataset_postfix='')])

View File

@ -4,16 +4,15 @@
# not to use them for recognition and text spotting.
_base_ = ['textdet.py']
data_root = 'data/naf'
data_converter = dict(
type='TextRecogCropConverter',
parser=dict(
type='NAFAnnParser', data_root=data_root, ignore=['¿', '§'],
det=False),
delete=['temp_images', 'naf_anno', 'data_split.json', 'annotations'])
_base_.train_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.test_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.val_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.test_preparer.packer.type = 'TextRecogCropPacker'
_base_.val_preparer.packer.type = 'TextRecogCropPacker'
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.val_preparer.gatherer.img_dir = 'textdet_imgs/val'
config_generator = dict(
type='TextRecogConfigGenerator',
data_root=data_root,
val_anns=[dict(ann_file='textrecog_val.json', dataset_postfix='')])

View File

@ -4,15 +4,16 @@
# not to use them for recognition and text spotting.
_base_ = ['textdet.py']
data_root = 'data/naf'
data_converter = dict(
type='TextSpottingDataConverter',
parser=dict(
type='NAFAnnParser', data_root=data_root, ignore=['¿', '§'],
det=False),
delete=['temp_images', 'naf_anno', 'data_split.json', 'annotations'])
_base_.train_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.test_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.val_preparer.parser.update(dict(ignore=['¿', '§'], det=False))
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
_base_.val_preparer.packer.type = 'TextSpottingPacker'
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.val_preparer.gatherer.img_dir = 'textdet_imgs/val'
config_generator = dict(
type='TextSpottingConfigGenerator',
data_root=data_root,
val_anns=[dict(ann_file='textspotting_val.json', dataset_postfix='')])

View File

@ -1,55 +1,64 @@
data_root = 'data/sroie'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/'
'sroie/0325updated.task1train(626p).zip',
save_name='0325updated.task1train(626p).zip',
md5='16137490f6865caac75772b9111d348c',
split=['train'],
content=['image', 'annotation'],
mapping=[[
'0325updated/0325updated.task1train(626p)/*.jpg',
'textdet_imgs/train'
],
[
'0325updated/0325updated.task1train(626p)/*.txt',
'annotations/train'
]]),
dict(
url='https://download.openmmlab.com/mmocr/data/'
'sroie/task1&2_test(361p).zip',
save_name='task1&2_test(361p).zip',
md5='1bde54705db0995c57a6e34cce437fea',
split=['test'],
content=['image'],
mapping=[[
'task1&2_test(361p)/fulltext_test(361p)', 'textdet_imgs/test'
]]),
dict(
url='https://download.openmmlab.com/mmocr/data/sroie/text.zip',
save_name='text.zip',
md5='8c534653f252ff4d3943fa27a956a74b',
split=['test'],
content=['annotation'],
mapping=[['text', 'annotations/test']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/'
'sroie/0325updated.task1train(626p).zip',
save_name='0325updated.task1train(626p).zip',
md5='16137490f6865caac75772b9111d348c',
content=['image', 'annotation'],
mapping=[[
'0325updated/0325updated.task1train(626p)/*.jpg',
'textdet_imgs/train'
],
[
'0325updated/0325updated.task1train(626p)/*.txt',
'annotations/train'
]])
]),
gatherer=dict(
type='pair_gather',
suffixes=['.jpg'],
type='PairGatherer',
img_suffixes=['.jpg'],
rule=[r'X(\d+)\.([jJ][pP][gG])', r'X\1.txt']),
parser=dict(type='SROIETextDetAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
delete=['text', 'task1&2_test(361p)', '0325updated', 'annotations'])
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/'
'sroie/task1&2_test(361p).zip',
save_name='task1&2_test(361p).zip',
md5='1bde54705db0995c57a6e34cce437fea',
content=['image'],
mapping=[[
'task1&2_test(361p)/fulltext_test(361p)',
'textdet_imgs/test'
]]),
dict(
url='https://download.openmmlab.com/mmocr/data/sroie/text.zip',
save_name='text.zip',
md5='8c534653f252ff4d3943fa27a956a74b',
content=['annotation'],
mapping=[['text', 'annotations/test']]),
]),
gatherer=dict(
type='PairGatherer',
img_suffixes=['.jpg'],
rule=[r'X(\d+)\.([jJ][pP][gG])', r'X\1.txt']),
parser=dict(type='SROIETextDetAnnParser', encoding='utf-8-sig'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
delete = ['text', 'task1&2_test(361p)', '0325updated', 'annotations']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.test_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,30 +1,44 @@
data_root = 'data/svt'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
save_name='svt.zip',
md5='42d19160010d990ae6223b14f45eff88',
split=['train', 'test'],
content=['image', 'annotations'],
mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
['svt/svt1/test.xml', 'annotations/test.xml'],
['svt/svt1/img', 'textdet_imgs/img']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
save_name='svt.zip',
md5='42d19160010d990ae6223b14f45eff88',
content=['image', 'annotations'],
mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
['svt/svt1/img', 'textdet_imgs/img']]),
]),
gatherer=dict(
type='mono_gather', train_ann='train.xml', test_ann='test.xml'),
parser=dict(type='SVTTextDetAnnParser', data_root=data_root),
type='MonoGatherer', ann_name='train.xml', img_dir='textdet_imgs/img'),
parser=dict(type='SVTTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'svt'])
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
save_name='svt.zip',
md5='42d19160010d990ae6223b14f45eff88',
content=['image', 'annotations'],
mapping=[['svt/svt1/test.xml', 'annotations/test.xml'],
['svt/svt1/img', 'textdet_imgs/img']]),
]),
gatherer=dict(
type='MonoGatherer', ann_name='test.xml', img_dir='textdet_imgs/img'),
parser=dict(type='SVTTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
delete = ['annotations', 'svt']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.test_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,29 +1,23 @@
data_root = 'data/svtp'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/svtp.zip',
save_name='svtp.zip',
md5='4232b46c81ba99eea6d057dcb06b8f75',
split=['test'],
content=['image', 'annotation'],
mapping=[['svtp/par1', 'textrecog_imgs/test'],
['svtp/gt.txt', 'annotations/test.txt']]),
])
data_converter = dict(
type='TextRecogDataConverter',
splits=['test'],
data_root=data_root,
gatherer=dict(type='mono_gather', test_ann='test.txt'),
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/svtp.zip',
save_name='svtp.zip',
md5='4232b46c81ba99eea6d057dcb06b8f75',
content=['image', 'annotation'],
mapping=[['svtp/par1', 'textrecog_imgs/test'],
['svtp/gt.txt', 'annotations/test.txt']]),
]),
gatherer=dict(type='MonoGatherer', ann_name='test.txt'),
parser=dict(
type='ICDARTxtTextRecogAnnParser', separator=' ', format='img text'),
dumper=dict(type='JsonDumper'),
delete=['svtp', 'annotations'])
config_generator = dict(type='TextRecogConfigGenerator', data_root=data_root)
packer=dict(type='TextRecogPacker'),
dumper=dict(type='JsonDumper'))
delete = ['svtp', 'annotations']
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,52 +1,67 @@
data_root = 'data/textocr'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://dl.fbaipublicfiles.com/textvqa/images/'
'train_val_images.zip',
save_name='textocr_textdet_train_val_img.zip',
md5='d12dd8098899044e4ae1af34db7ecfef',
split=['train', 'val'],
content=['image'],
mapping=[[
'textocr_textdet_train_val_img/train_images',
'textdet_imgs/train'
]]),
dict(
url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
'TextOCR_0.1_train.json',
save_name='textocr_textdet_train.json',
md5='0f8ba1beefd2ca4d08a4f82bcbe6cfb4',
split=['train'],
content=['annotation'],
mapping=[['textocr_textdet_train.json',
'annotations/train.json']]),
dict(
url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
'TextOCR_0.1_val.json',
save_name='textocr_textdet_val.json',
md5='fb151383ea7b3c530cde9ef0d5c08347',
split=['val'],
content=['annotation'],
mapping=[['textocr_textdet_val.json', 'annotations/val.json']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'val'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://dl.fbaipublicfiles.com/textvqa/images/'
'train_val_images.zip',
save_name='textocr_textdet_img.zip',
md5='d12dd8098899044e4ae1af34db7ecfef',
content=['image'],
mapping=[[
'textocr_textdet_img/train_images', 'textdet_imgs/images'
]]),
dict(
url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
'TextOCR_0.1_train.json',
save_name='textocr_textdet_train.json',
md5='0f8ba1beefd2ca4d08a4f82bcbe6cfb4',
content=['annotation'],
mapping=[[
'textocr_textdet_train.json', 'annotations/train.json'
]]),
]),
gatherer=dict(
type='mono_gather', train_ann='train.json', val_ann='val.json'),
parser=dict(
type='COCOTextDetAnnParser',
variant='textocr',
data_root=data_root + '/textdet_imgs/'),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'textocr_textdet_train_val_img'])
type='MonoGatherer',
ann_name='train.json',
img_dir='textdet_imgs/images'),
parser=dict(type='COCOTextDetAnnParser', variant='textocr'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'))
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
val_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://dl.fbaipublicfiles.com/textvqa/images/'
'train_val_images.zip',
save_name='textocr_textdet_img.zip',
md5='d12dd8098899044e4ae1af34db7ecfef',
content=['image'],
mapping=[[
'textocr_textdet_img/train_images', 'textdet_imgs/images'
]]),
dict(
url='https://dl.fbaipublicfiles.com/textvqa/data/textocr/'
'TextOCR_0.1_val.json',
save_name='textocr_textdet_val.json',
md5='fb151383ea7b3c530cde9ef0d5c08347',
content=['annotation'],
mapping=[['textocr_textdet_val.json',
'annotations/val.json']]),
]),
gatherer=dict(
type='MonoGatherer',
ann_name='val.json',
img_dir='textdet_imgs/images'),
parser=dict(type='COCOTextDetAnnParser', variant='textocr'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'))
delete = ['annotations', 'textocr_textdet_img']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.val_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.val_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,41 +1,62 @@
data_root = 'data/totaltext'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://universityofadelaide.box.com/shared/static/'
'8xro7hnvb0sqw5e5rxm73tryc59j6s43.zip',
save_name='totaltext.zip',
md5='5b56d71a4005a333cf200ff35ce87f75',
split=['train', 'test'],
content=['image'],
mapping=[['totaltext/Images/Train', 'textdet_imgs/train'],
['totaltext/Images/Test', 'textdet_imgs/test']]),
dict(
url='https://universityofadelaide.box.com/shared/static/'
'2vmpvjb48pcrszeegx2eznzc4izan4zf.zip',
save_name='txt_format.zip',
md5='53377a83420b4a0244304467512134e8',
split=['train', 'test'],
content=['annotation'],
mapping=[['txt_format/Train', 'annotations/train'],
['txt_format/Test', 'annotations/test']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://universityofadelaide.box.com/shared/static/'
'8xro7hnvb0sqw5e5rxm73tryc59j6s43.zip',
save_name='totaltext.zip',
md5='5b56d71a4005a333cf200ff35ce87f75',
content=['image'],
mapping=[['totaltext/Images/Train', 'textdet_imgs/train']]),
dict(
url='https://universityofadelaide.box.com/shared/static/'
'2vmpvjb48pcrszeegx2eznzc4izan4zf.zip',
save_name='txt_format.zip',
md5='53377a83420b4a0244304467512134e8',
content=['annotation'],
mapping=[['txt_format/Train', 'annotations/train']]),
]),
gatherer=dict(
type='pair_gather',
suffixes=['.jpg', '.JPG'],
type='PairGatherer',
img_suffixes=['.jpg', '.JPG'],
rule=[r'img(\d+)\.([jJ][pP][gG])', r'poly_gt_img\1.txt']),
parser=dict(type='TotaltextTextDetAnnParser', data_root=data_root),
parser=dict(type='TotaltextTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
delete=['totaltext', 'txt_format', 'annotations'])
)
config_generator = dict(type='TextDetConfigGenerator', data_root=data_root)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://universityofadelaide.box.com/shared/static/'
'8xro7hnvb0sqw5e5rxm73tryc59j6s43.zip',
save_name='totaltext.zip',
md5='5b56d71a4005a333cf200ff35ce87f75',
content=['image'],
mapping=[['totaltext/Images/Test', 'textdet_imgs/test']]),
dict(
url='https://universityofadelaide.box.com/shared/static/'
'2vmpvjb48pcrszeegx2eznzc4izan4zf.zip',
save_name='txt_format.zip',
md5='53377a83420b4a0244304467512134e8',
content=['annotation'],
mapping=[['txt_format/Test', 'annotations/test']]),
]),
gatherer=dict(
type='PairGatherer',
img_suffixes=['.jpg', '.JPG'],
rule=[r'img(\d+)\.([jJ][pP][gG])', r'poly_gt_img\1.txt']),
parser=dict(type='TotaltextTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper'),
)
delete = ['totaltext', 'txt_format', 'annotations']
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextRecogCropPacker'
_base_.test_preparer.packer.type = 'TextRecogCropPacker'
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,8 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.gatherer.img_dir = 'textdet_imgs/train'
_base_.test_preparer.gatherer.img_dir = 'textdet_imgs/test'
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,35 +1,71 @@
data_root = 'data/wildreceipt'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/wildreceipt.tar',
save_name='wildreceipt.tar',
md5='2a2c4a1b4777fb4fe185011e17ad46ae',
split=['train', 'test'],
content=['image', 'annotation'],
mapping=[
['wildreceipt/wildreceipt/class_list.txt', 'class_list.txt'],
['wildreceipt/wildreceipt/dict.txt', 'dict.txt'],
['wildreceipt/wildreceipt/test.txt', 'test.txt'],
['wildreceipt/wildreceipt/train.txt', 'train.txt'],
['wildreceipt/wildreceipt/image_files', 'image_files'],
]),
])
data_converter = dict(
type='WildReceiptConverter',
splits=['train', 'test'],
data_root=data_root,
train_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/'
'wildreceipt.tar',
save_name='wildreceipt.tar',
md5='2a2c4a1b4777fb4fe185011e17ad46ae',
content=['image', 'annotation'],
mapping=[
[
'wildreceipt/wildreceipt/class_list.txt',
'class_list.txt'
],
['wildreceipt/wildreceipt/dict.txt', 'dict.txt'],
[
'wildreceipt/wildreceipt/train.txt',
'annotations/train.txt'
],
[
'wildreceipt/wildreceipt/image_files/*/*/*.*',
'image_files'
],
]),
]),
gatherer=dict(
type='mono_gather',
train_ann='train.txt',
test_ann='test.txt',
ann_path=data_root),
parser=dict(type='WildreceiptKIEAnnParser', data_root=data_root),
type='MonoGatherer', ann_name='train.txt', img_dir='image_files'),
parser=dict(type='WildreceiptKIEAnnParser'),
packer=dict(type='WildReceiptPacker'),
dumper=dict(type='WildreceiptOpensetDumper'),
delete=['wildreceipt'])
)
test_preparer = dict(
obtainer=dict(
type='NaiveDataObtainer',
cache_path=cache_path,
files=[
dict(
url='https://download.openmmlab.com/mmocr/data/'
'wildreceipt.tar',
save_name='wildreceipt.tar',
md5='2a2c4a1b4777fb4fe185011e17ad46ae',
content=['image', 'annotation'],
mapping=[
[
'wildreceipt/wildreceipt/class_list.txt',
'class_list.txt'
],
['wildreceipt/wildreceipt/dict.txt', 'dict.txt'],
[
'wildreceipt/wildreceipt/test.txt',
'annotations/test.txt'
],
[
'wildreceipt/wildreceipt/image_files/*/*/*.*',
'image_files'
],
]),
]),
gatherer=dict(
type='MonoGatherer', img_dir='image_files', ann_name='test.txt'),
parser=dict(type='WildreceiptKIEAnnParser'),
packer=dict(type='WildReceiptPacker'),
dumper=dict(type='WildreceiptOpensetDumper'),
)
delete = ['wildreceipt', 'annotations']

View File

@ -1,9 +1,14 @@
_base_ = ['kie.py']
data_converter = dict(
type='TextDetDataConverter',
parser=dict(type='WildreceiptTextDetAnnParser'),
dumper=dict(type='JsonDumper'))
_base_.train_preparer.update(
dict(
parser=dict(type='WildreceiptTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper')))
_base_.test_preparer.update(
dict(
parser=dict(type='WildreceiptTextDetAnnParser'),
packer=dict(type='TextDetPacker'),
dumper=dict(type='JsonDumper')))
config_generator = dict(
type='TextRecogConfigGenerator', data_root=_base_.data_root)
config_generator = dict(type='TextDetConfigGenerator')

View File

@ -1,4 +1,15 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')
_base_.train_preparer.update(
dict(
parser=dict(type='WildreceiptTextDetAnnParser'),
packer=dict(type='TextRecogCropPacker'),
dumper=dict(type='JsonDumper')))
_base_.test_preparer.update(
dict(
parser=dict(type='WildreceiptTextDetAnnParser'),
packer=dict(type='TextRecogCropPacker'),
dumper=dict(type='JsonDumper')))
config_generator = dict(type='TextRecogConfigGenerator')

View File

@ -1,5 +1,6 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')
_base_.train_preparer.packer.type = 'TextSpottingPacker'
_base_.test_preparer.packer.type = 'TextSpottingPacker'
config_generator = dict(type='TextSpottingConfigGenerator')

View File

@ -1,17 +1,10 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .config_generator import (TextDetConfigGenerator,
TextRecogConfigGenerator,
TextSpottingConfigGenerator)
from .data_converter import (TextDetDataConverter, TextRecogDataConverter,
TextSpottingDataConverter, WildReceiptConverter)
from .data_obtainer import NaiveDataObtainer
from .config_generators import * # noqa
from .data_preparer import DatasetPreparer
from .dumpers import * # noqa
from .gatherers import * # noqa
from .obtainers import * # noqa
from .packers import * # noqa
from .parsers import * # noqa
__all__ = [
'DatasetPreparer', 'NaiveDataObtainer', 'TextDetDataConverter',
'TextRecogDataConverter', 'TextSpottingDataConverter',
'WildReceiptConverter', 'TextDetConfigGenerator',
'TextRecogConfigGenerator', 'TextSpottingConfigGenerator'
]
__all__ = ['DatasetPreparer']

View File

@ -1,374 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from abc import abstractmethod
from typing import Dict, List, Optional
from mmengine import mkdir_or_exist
from .data_preparer import CFG_GENERATORS
class BaseDatasetConfigGenerator:
"""Base class for dataset config generator.
Args:
data_root (str): The root path of the dataset.
task (str): The task of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to None.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to None.
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to None.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
task: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = None,
val_anns: Optional[List[Dict]] = None,
test_anns: Optional[List[Dict]] = None,
config_path: str = 'configs/',
) -> None:
self.config_path = config_path
self.data_root = data_root
self.task = task
self.dataset_name = dataset_name
self.overwrite_cfg = overwrite_cfg
self._prepare_anns(train_anns, val_anns, test_anns)
def _prepare_anns(self, train_anns: Optional[List[Dict]],
val_anns: Optional[List[Dict]],
test_anns: Optional[List[Dict]]) -> None:
"""Preprocess input arguments and stores these information into
``self.anns``.
``self.anns`` is a dict that maps the name of a dataset config variable
to a dict, which contains the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
"""
self.anns = {}
for split, ann_list in zip(('train', 'val', 'test'),
(train_anns, val_anns, test_anns)):
if ann_list is None:
continue
if not isinstance(ann_list, list):
raise ValueError(f'{split}_anns must be either a list or'
' None!')
for ann_dict in ann_list:
assert 'ann_file' in ann_dict
if ann_dict.get('dataset_postfix', ''):
key = f'{self.dataset_name}_{ann_dict["dataset_postfix"]}_{self.task}_{split}' # noqa
else:
key = f'{self.dataset_name}_{self.task}_{split}'
ann_dict['split'] = split
if key in self.anns:
raise ValueError(
f'Duplicate dataset variable {key} found! '
'Please use different dataset_postfix to avoid '
'conflict.')
self.anns[key] = ann_dict
def __call__(self) -> None:
"""Generates the base dataset config."""
dataset_config = self._gen_dataset_config()
cfg_path = osp.join(self.config_path, self.task, '_base_', 'datasets',
f'{self.dataset_name}.py')
if osp.exists(cfg_path) and not self.overwrite_cfg:
print(f'{cfg_path} found, skipping.')
return
mkdir_or_exist(osp.dirname(cfg_path))
with open(cfg_path, 'w') as f:
f.write(
f'{self.dataset_name}_{self.task}_data_root = \'{self.data_root}\'\n' # noqa: E501
)
f.write(dataset_config)
@abstractmethod
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Returns:
str: The generated dataset config.
"""
@CFG_GENERATORS.register_module()
class TextDetConfigGenerator(BaseDatasetConfigGenerator):
"""Text detection config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textdet_train.json', dataset_postfix='')]``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textdet_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textdet_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textdet_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
super().__init__(
data_root=data_root,
task='textdet',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path,
)
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Args:
ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
a config variable name (such as icdar2015_textrecog_train) to
its corresponding annotation information dict. Each dict
contains following keys:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
to None.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
Returns:
str: The generated dataset config.
"""
cfg = ''
for key_name, ann_dict in self.anns.items():
cfg += f'\n{key_name} = dict(\n'
cfg += ' type=\'OCRDataset\',\n'
cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501
cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n'
if ann_dict['split'] == 'train':
cfg += ' filter_cfg=dict(filter_empty_gt=True, min_size=32),\n' # noqa: E501
elif ann_dict['split'] in ['test', 'val']:
cfg += ' test_mode=True,\n'
cfg += ' pipeline=None)\n'
return cfg
@CFG_GENERATORS.register_module()
class TextRecogConfigGenerator(BaseDatasetConfigGenerator):
"""Text recognition config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textrecog_train.json'), dataset_postfix='']``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textrecog_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
Example:
It generates a dataset config like:
>>> ic15_rec_data_root = 'data/icdar2015/'
>>> icdar2015_textrecog_train = dict(
>>> type='OCRDataset',
>>> data_root=ic15_rec_data_root,
>>> ann_file='textrecog_train.json',
>>> test_mode=False,
>>> pipeline=None)
>>> icdar2015_textrecog_test = dict(
>>> type='OCRDataset',
>>> data_root=ic15_rec_data_root,
>>> ann_file='textrecog_test.json',
>>> test_mode=True,
>>> pipeline=None)
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textrecog_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textrecog_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
super().__init__(
data_root=data_root,
task='textrecog',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path)
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Args:
ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
a config variable name (such as icdar2015_textrecog_train) to
its corresponding annotation information dict. Each dict
contains following keys:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
to None.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
Returns:
str: The generated dataset config.
"""
cfg = ''
for key_name, ann_dict in self.anns.items():
cfg += f'\n{key_name} = dict(\n'
cfg += ' type=\'OCRDataset\',\n'
cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501
cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n'
if ann_dict['split'] in ['test', 'val']:
cfg += ' test_mode=True,\n'
cfg += ' pipeline=None)\n'
return cfg
@CFG_GENERATORS.register_module()
class TextSpottingConfigGenerator(TextDetConfigGenerator):
"""Text spotting config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textspotting_train.json', dataset_postfix='')]``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textspotting_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textspotting_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textspotting_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
BaseDatasetConfigGenerator.__init__(
self,
data_root=data_root,
task='textspotting',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path,
)

View File

@ -0,0 +1,10 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseDatasetConfigGenerator
from .textdet_config_generator import TextDetConfigGenerator
from .textrecog_config_generator import TextRecogConfigGenerator
from .textspotting_config_generator import TextSpottingConfigGenerator
__all__ = [
'BaseDatasetConfigGenerator', 'TextDetConfigGenerator',
'TextRecogConfigGenerator', 'TextSpottingConfigGenerator'
]

View File

@ -0,0 +1,120 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from abc import abstractmethod
from typing import Dict, List, Optional
from mmengine import mkdir_or_exist
class BaseDatasetConfigGenerator:
"""Base class for dataset config generator.
Args:
data_root (str): The root path of the dataset.
task (str): The task of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to None.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to None.
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to None.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
task: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = None,
val_anns: Optional[List[Dict]] = None,
test_anns: Optional[List[Dict]] = None,
config_path: str = 'configs/',
) -> None:
self.config_path = config_path
self.data_root = data_root
self.task = task
self.dataset_name = dataset_name
self.overwrite_cfg = overwrite_cfg
self._prepare_anns(train_anns, val_anns, test_anns)
def _prepare_anns(self, train_anns: Optional[List[Dict]],
val_anns: Optional[List[Dict]],
test_anns: Optional[List[Dict]]) -> None:
"""Preprocess input arguments and stores these information into
``self.anns``.
``self.anns`` is a dict that maps the name of a dataset config variable
to a dict, which contains the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
"""
self.anns = {}
for split, ann_list in zip(('train', 'val', 'test'),
(train_anns, val_anns, test_anns)):
if ann_list is None:
continue
if not isinstance(ann_list, list):
raise ValueError(f'{split}_anns must be either a list or'
' None!')
for ann_dict in ann_list:
assert 'ann_file' in ann_dict
if ann_dict.get('dataset_postfix', ''):
key = f'{self.dataset_name}_{ann_dict["dataset_postfix"]}_{self.task}_{split}' # noqa
else:
key = f'{self.dataset_name}_{self.task}_{split}'
ann_dict['split'] = split
if key in self.anns:
raise ValueError(
f'Duplicate dataset variable {key} found! '
'Please use different dataset_postfix to avoid '
'conflict.')
self.anns[key] = ann_dict
def __call__(self) -> None:
"""Generates the base dataset config."""
dataset_config = self._gen_dataset_config()
cfg_path = osp.join(self.config_path, self.task, '_base_', 'datasets',
f'{self.dataset_name}.py')
if osp.exists(cfg_path) and not self.overwrite_cfg:
print(f'{cfg_path} found, skipping.')
return
mkdir_or_exist(osp.dirname(cfg_path))
with open(cfg_path, 'w') as f:
f.write(
f'{self.dataset_name}_{self.task}_data_root = \'{self.data_root}\'\n' # noqa: E501
)
f.write(dataset_config)
@abstractmethod
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Returns:
str: The generated dataset config.
"""

View File

@ -0,0 +1,96 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
from ..data_preparer import CFG_GENERATORS
from .base import BaseDatasetConfigGenerator
@CFG_GENERATORS.register_module()
class TextDetConfigGenerator(BaseDatasetConfigGenerator):
"""Text detection config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textdet_train.json', dataset_postfix='')]``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textdet_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textdet_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textdet_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
super().__init__(
data_root=data_root,
task='textdet',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path,
)
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Args:
ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
a config variable name (such as icdar2015_textrecog_train) to
its corresponding annotation information dict. Each dict
contains following keys:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
to None.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
Returns:
str: The generated dataset config.
"""
cfg = ''
for key_name, ann_dict in self.anns.items():
cfg += f'\n{key_name} = dict(\n'
cfg += ' type=\'OCRDataset\',\n'
cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501
cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n'
if ann_dict['split'] == 'train':
cfg += ' filter_cfg=dict(filter_empty_gt=True, min_size=32),\n' # noqa: E501
elif ann_dict['split'] in ['test', 'val']:
cfg += ' test_mode=True,\n'
cfg += ' pipeline=None)\n'
return cfg

View File

@ -0,0 +1,109 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
from ..data_preparer import CFG_GENERATORS
from .base import BaseDatasetConfigGenerator
@CFG_GENERATORS.register_module()
class TextRecogConfigGenerator(BaseDatasetConfigGenerator):
"""Text recognition config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textrecog_train.json'), dataset_postfix='']``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textrecog_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
Example:
It generates a dataset config like:
>>> ic15_rec_data_root = 'data/icdar2015/'
>>> icdar2015_textrecog_train = dict(
>>> type='OCRDataset',
>>> data_root=ic15_rec_data_root,
>>> ann_file='textrecog_train.json',
>>> test_mode=False,
>>> pipeline=None)
>>> icdar2015_textrecog_test = dict(
>>> type='OCRDataset',
>>> data_root=ic15_rec_data_root,
>>> ann_file='textrecog_test.json',
>>> test_mode=True,
>>> pipeline=None)
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textrecog_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textrecog_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
super().__init__(
data_root=data_root,
task='textrecog',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path)
def _gen_dataset_config(self) -> str:
"""Generate a full dataset config based on the annotation file
dictionary.
Args:
ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
a config variable name (such as icdar2015_textrecog_train) to
its corresponding annotation information dict. Each dict
contains following keys:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
to None.
- split (str): The split the annotation belongs to. Usually
it can be 'train', 'val' and 'test'.
Returns:
str: The generated dataset config.
"""
cfg = ''
for key_name, ann_dict in self.anns.items():
cfg += f'\n{key_name} = dict(\n'
cfg += ' type=\'OCRDataset\',\n'
cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501
cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n'
if ann_dict['split'] in ['test', 'val']:
cfg += ' test_mode=True,\n'
cfg += ' pipeline=None)\n'
return cfg

View File

@ -0,0 +1,63 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional
from ..data_preparer import CFG_GENERATORS
from .base import BaseDatasetConfigGenerator
from .textdet_config_generator import TextDetConfigGenerator
@CFG_GENERATORS.register_module()
class TextSpottingConfigGenerator(TextDetConfigGenerator):
"""Text spotting config generator.
Args:
data_root (str): The root path of the dataset.
dataset_name (str): The name of the dataset.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, config generator will not generate new
config for datasets whose configs are already in base.
train_anns (List[Dict], optional): A list of train annotation files
to appear in the base configs. Defaults to
``[dict(file='textspotting_train.json', dataset_postfix='')]``.
Each element is typically a dict with the following fields:
- ann_file (str): The path to the annotation file relative to
data_root.
- dataset_postfix (str, optional): Affects the postfix of the
resulting variable in the generated config. If specified, the
dataset variable will be named in the form of
``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
None.
val_anns (List[Dict], optional): A list of val annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to [].
test_anns (List[Dict], optional): A list of test annotation files
to appear in the base configs, similar to ``train_anns``. Defaults
to ``[dict(file='textspotting_test.json')]``.
config_path (str): Path to the configs. Defaults to 'configs/'.
"""
def __init__(
self,
data_root: str,
dataset_name: str,
overwrite_cfg: bool = False,
train_anns: Optional[List[Dict]] = [
dict(ann_file='textspotting_train.json', dataset_postfix='')
],
val_anns: Optional[List[Dict]] = [],
test_anns: Optional[List[Dict]] = [
dict(ann_file='textspotting_test.json', dataset_postfix='')
],
config_path: str = 'configs/',
) -> None:
BaseDatasetConfigGenerator.__init__(
self,
data_root=data_root,
task='textspotting',
overwrite_cfg=overwrite_cfg,
dataset_name=dataset_name,
train_anns=train_anns,
val_anns=val_anns,
test_anns=test_anns,
config_path=config_path,
)

View File

@ -1,752 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os
import os.path as osp
import re
import shutil
from abc import abstractmethod
from functools import partial
from typing import Dict, List, Optional, Sequence, Tuple
import mmcv
from mmengine import mkdir_or_exist, track_parallel_progress
from mmocr.utils import bbox2poly, crop_img, list_files, poly2bbox, warp_img
from .data_preparer import DATA_CONVERTERS, DATA_DUMPERS, DATA_PARSERS
class BaseDataConverter:
"""Base class for data processor.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset files.
dumper (Dict): Config dict for dumping the dataset files.
nproc (int): Number of processes to process the data.
task (str): Task of the dataset.
dataset_name (str): Dataset name.
delete (Optional[List]): A list of files to be deleted after
conversion.
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
nproc: int,
task: str,
dataset_name: str,
delete: Optional[List] = None,
config_path: str = 'configs/'):
assert isinstance(nproc, int) and nproc > 0, \
'nproc must be a positive integer.'
self.splits = splits
self.data_root = data_root
self.nproc = nproc
self.task = task
self.dataset_name = dataset_name
self.delete = delete
self.config_path = config_path
self.img_dir = f'{task}_imgs'
parser.update(dict(nproc=nproc))
dumper.update(dict(task=task))
self.parser = DATA_PARSERS.build(parser)
self.dumper = DATA_DUMPERS.build(dumper)
gather_type = gatherer.pop('type')
self.gatherer_args = gatherer
if gather_type == 'pair_gather':
self.gatherer = self.pair_gather
elif gather_type == 'mono_gather':
self.gatherer = self.mono_gather
elif gather_type == 'naf_gather':
self.gatherer = self.naf_gather
else:
raise NotImplementedError
def __call__(self):
"""Process the data.
Returns:
Dict: A dict that maps each split to the path of the annotation
files.
"""
# Convert and dump annotations to MMOCR format
for self.current_split in self.splits:
print(f'Parsing {self.current_split} split...')
# Gather the info such as file names required by parser
img_path = osp.join(self.data_root, self.img_dir,
self.current_split)
ann_path = osp.join(self.data_root, 'annotations')
gatherer_args = dict(img_path=img_path, ann_path=ann_path)
gatherer_args.update(self.gatherer_args)
files = self.gatherer(**gatherer_args)
# Convert dataset annotations to MMOCR format
samples = self.parser.parse_files(files, self.current_split)
print(f'Packing {self.current_split} annotations...')
func = partial(self.pack_instance, split=self.current_split)
samples = track_parallel_progress(func, samples, nproc=self.nproc)
samples = self.add_meta(samples)
# Dump annotation files
self.dumper.dump(samples, self.data_root, self.current_split)
self.clean()
@abstractmethod
def pack_instance(self, sample: Tuple, split: str) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, ann_file).
- img_path (str): Path to image file.
- instances (Sequence[Dict]): A list of converted annos.
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
@abstractmethod
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
def mono_gather(self,
ann_path: str,
train_ann: Optional[str] = None,
val_ann: Optional[str] = None,
test_ann: Optional[str] = None,
**kwargs) -> str:
"""Gather the dataset file. Specifically for the case that only one
annotation file is needed. For example,
img_001.jpg \
img_002.jpg ---> train.json
img_003.jpg /
Args:
anno_path (str): Path to the annotations.
train_ann (str, optional): The annotation file name of the train
split in the original dataset. Defaults to None.
val_ann (str, optional): The annotation file name of the val split
in the original dataset. Defaults to None.
test_ann (str, optional): The annotation file name of the test
split in the original dataset. Defaults to None.
Returns:
str: Path to the annotation file.
"""
ann_file = eval(f'{self.current_split}_ann')
if ann_file is None:
raise ValueError(
f'{self.current_split}_ann must be specified in gatherer!')
return osp.join(ann_path, ann_file)
def pair_gather(self, img_path: str, suffixes: List, rule: Sequence,
**kwargs) -> List[Tuple]:
"""Gather the dataset files. Specifically for the paired annotations.
That is to say, each image has a corresponding annotation file. For
example,
img_1.jpg <---> gt_img_1.txt
img_2.jpg <---> gt_img_2.txt
img_3.jpg <---> gt_img_3.txt
Args:
img_path (str): Path to the images.
suffixes (List[str]): File suffixes that used for searching.
rule (Sequence): The rule for pairing the files. The
first element is the matching pattern for the file, and the
second element is the replacement pattern, which should
be a regular expression. For example, to map the image
name img_1.jpg to the annotation name gt_img_1.txt,
the rule is
[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt'] # noqa: W605 E501
Returns:
List[Tuple]: A list of tuples (img_path, ann_path).
"""
files = list()
for file in list_files(img_path, suffixes):
if not re.match(rule[0], osp.basename(file)):
continue
file2 = re.sub(rule[0], rule[1], osp.basename(file))
file2 = file.replace(osp.basename(file), file2)
file2 = file2.replace(self.img_dir, 'annotations')
files.append((file, file2))
return files
def naf_gather(self, img_path: str, ann_path: str,
**kwargs) -> List[Tuple]:
"""Gather the dataset file from NAF dataset. Specifically for the case
that there is a split file that contains the names of different splits.
For example,
img_001.jpg train: img_001.jpg
img_002.jpg ---> data_split.json ---> test: img_002.jpg
img_003.jpg val: img_003.jpg
Args:
img_path (str): Path to the images.
anno_path (str): Path to the annotations.
Returns:
List[Tuple]: A list of tuples (img_path, ann_path).
"""
split_file = osp.join(self.data_root, 'data_split.json')
with open(split_file, 'r') as f:
split_data = json.load(f)
files = []
# Rename the key
split_data['val'] = split_data.pop('valid')
if not osp.exists(img_path):
os.makedirs(img_path)
for groups in split_data[self.current_split]:
for img_name in split_data[self.current_split][groups]:
src_img = osp.join(self.data_root, 'temp_images', img_name)
dst_img = osp.join(img_path, img_name)
if not osp.exists(src_img):
Warning(f'{src_img} does not exist!')
continue
# move the image to the new path
shutil.move(src_img, dst_img)
ann = osp.join(ann_path, img_name.replace('.jpg', '.json'))
files.append((dst_img, ann))
return files
def clean(self) -> None:
for d in self.delete:
delete_file = osp.join(self.data_root, d)
if osp.exists(delete_file):
if osp.isdir(delete_file):
shutil.rmtree(delete_file)
else:
os.remove(delete_file)
@DATA_CONVERTERS.register_module()
class TextDetDataConverter(BaseDataConverter):
"""Text detection data converter.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset files.
dumper (Dict): Config dict for dumping the dataset files.
dataset_name (str): Name of the dataset.
nproc (int): Number of processes to process the data.
delete (Optional[List]): A list of files to be deleted after
conversion. Defaults to ['annotations].
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
dataset_name: str,
nproc: int,
delete: List = ['annotations']) -> None:
super().__init__(
splits=splits,
data_root=data_root,
gatherer=gatherer,
parser=parser,
dumper=dumper,
dataset_name=dataset_name,
nproc=nproc,
delete=delete,
task='textdet')
def pack_instance(self,
sample: Tuple,
split: str,
bbox_label: int = 0) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, instances).
- img_path (str): Path to the image file.
- instances (Sequence[Dict]): A list of converted annos. Each
element should be a dict with the following keys:
- 'poly' or 'box'
- 'ignore'
- 'bbox_label' (optional)
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
img_path, instances = sample
img = mmcv.imread(img_path)
h, w = img.shape[:2]
packed_instances = list()
for instance in instances:
poly = instance.get('poly', None)
box = instance.get('box', None)
assert box or poly
packed_sample = dict(
polygon=poly if poly else list(
bbox2poly(box).astype('float64')),
bbox=box if box else list(poly2bbox(poly).astype('float64')),
bbox_label=bbox_label,
ignore=instance['ignore'])
packed_instances.append(packed_sample)
packed_instances = dict(
instances=packed_instances,
img_path=img_path.replace(self.data_root + '/', ''),
height=h,
width=w)
return packed_instances
def add_meta(self, sample: List) -> Dict:
meta = {
'metainfo': {
'dataset_type': 'TextDetDataset',
'task_name': 'textdet',
'category': [{
'id': 0,
'name': 'text'
}]
},
'data_list': sample
}
return meta
@DATA_CONVERTERS.register_module()
class TextSpottingDataConverter(BaseDataConverter):
"""Text spotting data converter.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset files.
dumper (Dict): Config dict for dumping the dataset files.
dataset_name (str): Name of the dataset.
nproc (int): Number of processes to process the data.
delete (Optional[List]): A list of files to be deleted after
conversion. Defaults to ['annotations'].
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
dataset_name: str,
nproc: int,
delete: List = ['annotations']) -> None:
super().__init__(
splits=splits,
data_root=data_root,
gatherer=gatherer,
parser=parser,
dumper=dumper,
dataset_name=dataset_name,
nproc=nproc,
delete=delete,
task='textspotting')
# Textspotting task shares the same images with textdet task
self.img_dir = 'textdet_imgs'
def pack_instance(self,
sample: Tuple,
split: str,
bbox_label: int = 0) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, ann_file).
- img_path (str): Path to image file.
- instances (Sequence[Dict]): A list of converted annos. Each
element should be a dict with the following keys:
- 'poly' or 'box'
- 'text'
- 'ignore'
- 'bbox_label' (optional)
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
img_path, instances = sample
img = mmcv.imread(img_path)
h, w = img.shape[:2]
packed_instances = list()
for instance in instances:
assert 'text' in instance, 'Text is not found in the instance.'
poly = instance.get('poly', None)
box = instance.get('box', None)
assert box or poly
packed_sample = dict(
polygon=poly if poly else list(
bbox2poly(box).astype('float64')),
bbox=box if box else list(poly2bbox(poly).astype('float64')),
bbox_label=bbox_label,
ignore=instance['ignore'],
text=instance['text'])
packed_instances.append(packed_sample)
packed_instances = dict(
instances=packed_instances,
img_path=img_path.replace(self.data_root + '/', ''),
height=h,
width=w)
return packed_instances
def add_meta(self, sample: List) -> Dict:
meta = {
'metainfo': {
'dataset_type': 'TextSpottingDataset',
'task_name': 'textspotting',
'category': [{
'id': 0,
'name': 'text'
}]
},
'data_list': sample
}
return meta
@DATA_CONVERTERS.register_module()
class TextRecogDataConverter(BaseDataConverter):
"""Text recognition data converter.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset annotations.
dumper (Dict): Config dict for dumping the dataset files.
dataset_name (str): Name of the dataset.
nproc (int): Number of processes to process the data.
delete (Optional[List]): A list of files to be deleted after
conversion. Defaults to ['annotations].
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
dataset_name: str,
nproc: int,
delete: List = ['annotations']):
super().__init__(
splits=splits,
data_root=data_root,
gatherer=gatherer,
parser=parser,
dumper=dumper,
dataset_name=dataset_name,
nproc=nproc,
task='textrecog',
delete=delete)
def pack_instance(self, sample: Tuple, split: str) -> Dict:
"""Pack the text info to a recognition instance.
Args:
samples (Tuple): A tuple of (img_name, text).
split (str): The split of the instance.
Returns:
Dict: The packed instance.
"""
img_name, text = sample
packed_instance = dict(
instances=[dict(text=text)],
img_path=osp.join(self.img_dir, split, osp.basename(img_name)))
return packed_instance
def add_meta(self, sample: List) -> Dict:
meta = {
'metainfo': {
'dataset_type': 'TextRecogDataset',
'task_name': 'textrecog'
},
'data_list': sample
}
return meta
@DATA_CONVERTERS.register_module()
class TextRecogCropConverter(TextRecogDataConverter):
"""Text recognition crop converter. This converter will crop the text from
the original image. The parser used for this Converter should be a TextDet
parser.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset annotations.
dumper (Dict): Config dict for dumping the dataset files.
dataset_name (str): Name of the dataset.
nproc (int): Number of processes to process the data.
crop_with_warp (bool): Whether to crop the text from the original image
using opencv warpPerspective.
jitter (bool): (Applicable when crop_with_warp=True)
Whether to jitter the box.
jitter_ratio_x (float): (Applicable when crop_with_warp=True)
Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): (Applicable when crop_with_warp=True)
Vertical jitter ratio relative to the height.
long_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the long edge of the cropped image.
Defaults to 0.1.
short_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the short edge of the cropped image.
Defaults to 0.05.
delete (Optional[List]): A list of files to be deleted after
conversion. Defaults to ['annotations].
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
dataset_name: str,
nproc: int,
crop_with_warp: bool = False,
jitter: bool = False,
jitter_ratio_x: float = 0.0,
jitter_ratio_y: float = 0.0,
long_edge_pad_ratio: float = 0.0,
short_edge_pad_ratio: float = 0.0,
delete: List = ['annotations']):
super().__init__(
splits=splits,
data_root=data_root,
gatherer=gatherer,
parser=parser,
dumper=dumper,
dataset_name=dataset_name,
nproc=nproc,
delete=delete)
self.crop_with_warp = crop_with_warp
self.jitter = jitter
self.jrx = jitter_ratio_x
self.jry = jitter_ratio_y
self.lepr = long_edge_pad_ratio
self.sepr = short_edge_pad_ratio
# Crop converter crops the images of textdet to patches
self.img_dir = 'textdet_imgs'
self.cropped_img_dir = 'textrecog_imgs'
self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir)
mkdir_or_exist(self.crop_save_path)
for split in splits:
mkdir_or_exist(osp.join(self.crop_save_path, split))
def pack_instance(self, sample: Tuple, split: str) -> List:
"""Crop patches from image.
Args:
samples (Tuple): A tuple of (img_name, text).
split (str): The split of the instance.
Return:
List: The list of cropped patches.
"""
def get_box(instance: Dict) -> List:
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
if 'poly' in instance:
return bbox2poly(poly2bbox(instance['poly'])).tolist()
def get_poly(instance: Dict) -> List:
if 'poly' in instance:
return instance['poly']
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
data_list = []
img_path, instances = sample
img = mmcv.imread(img_path)
for i, instance in enumerate(instances):
if instance['ignore']:
continue
if self.crop_with_warp:
poly = get_poly(instance)
patch = warp_img(img, poly, self.jitter, self.jrx, self.jry)
else:
box = get_box(instance)
patch = crop_img(img, box, self.lepr, self.sepr)
if patch.shape[0] == 0 or patch.shape[1] == 0:
continue
text = instance['text']
patch_name = osp.splitext(
osp.basename(img_path))[0] + f'_{i}' + osp.splitext(
osp.basename(img_path))[1]
dst_path = osp.join(self.crop_save_path, split, patch_name)
mmcv.imwrite(patch, dst_path)
rec_instance = dict(
instances=[dict(text=text)],
img_path=osp.join(self.cropped_img_dir, split, patch_name))
data_list.append(rec_instance)
return data_list
def add_meta(self, sample: List) -> Dict:
# Since the TextRecogCropConverter packs all of the patches in a single
# image into a list, we need to flatten the list.
sample = [item for sublist in sample for item in sublist]
return super().add_meta(sample)
@DATA_CONVERTERS.register_module()
class WildReceiptConverter(BaseDataConverter):
"""MMOCR only supports wildreceipt dataset for KIE task now. This converter
converts the wildreceipt dataset from close set to open set.
Args:
splits (List): A list of splits to be processed.
data_root (str): Path to the data root.
gatherer (Dict): Config dict for gathering the dataset files.
parser (Dict): Config dict for parsing the dataset annotations.
dumper (Dict): Config dict for dumping the dataset files.
nproc (int): Number of processes to process the data.
delete (Optional[List]): A list of files to be deleted after
conversion. Defaults to ['annotations].
merge_bg_others (bool): If True, give the same label to "background"
class and "others" class. Defaults to True.
ignore_idx (int): Index for ``ignore`` class. Defaults to 0.
others_idx (int): Index for ``others`` class. Defaults to 25.
"""
def __init__(self,
splits: List,
data_root: str,
gatherer: Dict,
parser: Dict,
dumper: Dict,
dataset_name: str,
nproc: int,
delete: Optional[List] = None,
merge_bg_others: bool = False,
ignore_idx: int = 0,
others_idx: int = 25):
self.ignore_idx = ignore_idx
self.others_idx = others_idx
self.merge_bg_others = merge_bg_others
parser.update(dict(ignore=ignore_idx))
super().__init__(
splits=splits,
data_root=data_root,
gatherer=gatherer,
parser=parser,
dumper=dumper,
dataset_name=dataset_name,
nproc=nproc,
task='kie',
delete=delete)
def add_meta(self, samples: List) -> List:
"""No meta info is required for the wildreceipt dataset."""
return samples
def pack_instance(self, sample: str, split: str):
"""Pack line-json str of close set to line-json str of open set.
Args:
sample (str): The string to be deserialized to
the close set dictionary object.
split (str): The split of the instance.
"""
# Two labels at the same index of the following two lists
# make up a key-value pair. For example, in wildreceipt,
# closeset_key_inds[0] maps to "Store_name_key"
# and closeset_value_inds[0] maps to "Store_addr_value".
closeset_key_inds = list(range(2, self.others_idx, 2))
closeset_value_inds = list(range(1, self.others_idx, 2))
openset_node_label_mapping = {
'bg': 0,
'key': 1,
'value': 2,
'others': 3
}
if self.merge_bg_others:
openset_node_label_mapping['others'] = openset_node_label_mapping[
'bg']
closeset_obj = json.loads(sample)
openset_obj = {
'file_name': closeset_obj['file_name'],
'height': closeset_obj['height'],
'width': closeset_obj['width'],
'annotations': []
}
edge_idx = 1
label_to_edge = {}
for anno in closeset_obj['annotations']:
label = anno['label']
if label == self.ignore_idx:
anno['label'] = openset_node_label_mapping['bg']
anno['edge'] = edge_idx
edge_idx += 1
elif label == self.others_idx:
anno['label'] = openset_node_label_mapping['others']
anno['edge'] = edge_idx
edge_idx += 1
else:
edge = label_to_edge.get(label, None)
if edge is not None:
anno['edge'] = edge
if label in closeset_key_inds:
anno['label'] = openset_node_label_mapping['key']
elif label in closeset_value_inds:
anno['label'] = openset_node_label_mapping['value']
else:
tmp_key = 'key'
if label in closeset_key_inds:
label_with_same_edge = closeset_value_inds[
closeset_key_inds.index(label)]
elif label in closeset_value_inds:
label_with_same_edge = closeset_key_inds[
closeset_value_inds.index(label)]
tmp_key = 'value'
edge_counterpart = label_to_edge.get(
label_with_same_edge, None)
if edge_counterpart is not None:
anno['edge'] = edge_counterpart
else:
anno['edge'] = edge_idx
edge_idx += 1
anno['label'] = openset_node_label_mapping[tmp_key]
label_to_edge[label] = anno['edge']
openset_obj['annotations'] = closeset_obj['annotations']
return json.dumps(openset_obj, ensure_ascii=False)

View File

@ -1,32 +1,39 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
import os.path as osp
import time
import shutil
from typing import List, Optional, Union
from mmengine import Registry
from mmengine.config import Config
from mmocr.utils.typing_utils import ConfigType, OptConfigType
DATA_PREPARERS = Registry('data preparer')
DATA_OBTAINERS = Registry('data_obtainer')
DATA_CONVERTERS = Registry('data_converter')
DATA_GATHERERS = Registry('data_gatherer')
DATA_PARSERS = Registry('data_parser')
DATA_DUMPERS = Registry('data_dumper')
DATA_PACKERS = Registry('data_packer')
CFG_GENERATORS = Registry('cfg_generator')
@DATA_PREPARERS.register_module()
class DatasetPreparer:
"""Base class of dataset preparer.
Dataset preparer is used to prepare dataset for MMOCR. It mainly consists
of three steps:
1. Obtain the dataset
1. For each split:
- Obtain the dataset
- Download
- Extract
- Move/Rename
2. Process the dataset
- Parse original annotations
- Convert to mmocr format
- Dump the annotation file
- Clean useless files
- Gather the dataset
- Parse the dataset
- Pack the dataset to MMOCR format
- Dump the dataset
2. Delete useless files
3. Generate the base config for this dataset
After all these steps, the original datasets have been prepared for
@ -34,106 +41,169 @@ class DatasetPreparer:
https://mmocr.readthedocs.io/en/dev-1.x/user_guides/dataset_prepare.html
Args:
cfg_path (str): Path to dataset config file.
data_root (str): Root directory of data.
dataset_name (str): Dataset name.
task (str): Task type. Options are 'textdet', 'textrecog',
'textspotter', and 'kie'. Defaults to 'textdet'.
nproc (int): Number of parallel processes. Defaults to 4.
overwrite_cfg (bool): Whether to overwrite the dataset config file if
it already exists. If False, Dataset Preparer will not generate new
config for datasets whose configs are already in base.
train_preparer (OptConfigType): cfg for train data prepare. It contains
the following keys:
- obtainer: cfg for data obtainer.
- gatherer: cfg for data gatherer.
- parser: cfg for data parser.
- packer: cfg for data packer.
- dumper: cfg for data dumper.
Defaults to None.
test_preparer (OptConfigType): cfg for test data prepare. Defaults to
None.
val_preparer (OptConfigType): cfg for val data prepare. Defaults to
None.
config_generator (OptConfigType): cfg for config generator. Defaults to
None.
delete (list[str], optional): List of files to be deleted.
Defaults to None.
"""
def __init__(self,
cfg_path: str,
dataset_name: str,
data_root: str,
dataset_name: str = '',
task: str = 'textdet',
nproc: int = 4,
overwrite_cfg: bool = False) -> None:
cfg_path = osp.join(cfg_path, dataset_name)
train_preparer: OptConfigType = None,
test_preparer: OptConfigType = None,
val_preparer: OptConfigType = None,
config_generator: OptConfigType = None,
delete: Optional[List[str]] = None) -> None:
self.data_root = data_root
self.nproc = nproc
self.task = task
self.dataset_name = dataset_name
self.overwrite_cfg = overwrite_cfg
self.parse_meta(cfg_path)
self.parse_cfg(cfg_path)
self.train_preparer = train_preparer
self.test_preparer = test_preparer
self.val_preparer = val_preparer
self.config_generator = config_generator
self.delete = delete
def __call__(self):
def run(self, splits: Union[str, List] = ['train', 'test', 'val']) -> None:
"""Prepare the dataset."""
if self.with_obtainer:
print('Obtaining Dataset...')
self.data_obtainer()
if self.with_converter:
print('Converting Dataset...')
self.data_converter()
if self.with_config_generator:
print('Generating base configs...')
self.config_generator()
if isinstance(splits, str):
splits = [splits]
assert set(splits).issubset(set(['train', 'test',
'val'])), 'Invalid split name'
for split in splits:
self.loop(split, getattr(self, f'{split}_preparer'))
self.clean()
self.generate_config()
def parse_meta(self, cfg_path: str) -> None:
"""Parse meta file.
@classmethod
def from_file(cls, cfg: ConfigType) -> 'DatasetPreparer':
"""Create a DataPreparer from config file.
Args:
cfg_path (str): Path to meta file.
cfg (ConfigType): A config used for building runner. Keys of
``cfg`` can see :meth:`__init__`.
Returns:
Runner: A DatasetPreparer build from ``cfg``.
"""
try:
meta = Config.fromfile(osp.join(cfg_path, 'metafile.yml'))
except FileNotFoundError:
cfg = copy.deepcopy(cfg)
data_preparer = cls(
data_root=cfg['data_root'],
dataset_name=cfg.get('dataset_name', ''),
task=cfg.get('task', 'textdet'),
nproc=cfg.get('nproc', 4),
train_preparer=cfg.get('train_preparer', None),
test_preparer=cfg.get('test_preparer', None),
val_preparer=cfg.get('val_preparer', None),
delete=cfg.get('delete', None),
config_generator=cfg.get('config_generator', None))
return data_preparer
def loop(self, split: str, cfg: ConfigType) -> None:
"""Loop over the dataset.
Args:
split (str): The split of the dataset.
cfg (ConfigType): A config used for building obtainer, gatherer,
parser, packer and dumper.
"""
if cfg is None:
return
assert self.task in meta['Data']['Tasks'], \
f'Task {self.task} not supported!'
# License related
if meta['Data']['License']['Type']:
print(f"\033[1;33;40mDataset Name: {meta['Name']}")
print(f"License Type: {meta['Data']['License']['Type']}")
print(f"License Link: {meta['Data']['License']['Link']}")
print(f"BibTeX: {meta['Paper']['BibTeX']}\033[0m")
print(
'\033[1;31;43mMMOCR does not own the dataset. Using this '
'dataset you must accept the license provided by the owners, '
'and cite the corresponding papers appropriately.')
print('If you do not agree with the above license, please cancel '
'the progress immediately by pressing ctrl+c. Otherwise, '
'you are deemed to accept the terms and conditions.\033[0m')
for i in range(5):
print(f'{5-i}...')
time.sleep(1)
def parse_cfg(self, cfg_path: str) -> None:
"""Parse dataset config file.
# build obtainer and run
obtainer = cfg.get('obtainer', None)
if obtainer:
print(f'Obtaining {split} Dataset...')
obtainer.setdefault('task', default=self.task)
obtainer.setdefault('data_root', default=self.data_root)
obtainer = DATA_OBTAINERS.build(obtainer)
obtainer()
Args:
cfg_path (str): Path to dataset config file.
"""
cfg_path = osp.join(cfg_path, self.task + '.py')
assert osp.exists(cfg_path), f'Config file {cfg_path} not found!'
cfg = Config.fromfile(cfg_path)
# build gatherer
gatherer = cfg.get('gatherer', None)
parser = cfg.get('parser', None)
packer = cfg.get('packer', None)
dumper = cfg.get('dumper', None)
related = [gatherer, parser, packer, dumper]
if all(item is None for item in related): # no data process
return
if not all(item is not None for item in related):
raise ValueError('gatherer, parser, packer and dumper should be '
'either all None or not None')
if 'data_obtainer' in cfg:
cfg.data_obtainer.update(task=self.task)
self.data_obtainer = DATA_OBTAINERS.build(cfg.data_obtainer)
if 'data_converter' in cfg:
cfg.data_converter.update(
dict(nproc=self.nproc, dataset_name=self.dataset_name))
self.data_converter = DATA_CONVERTERS.build(cfg.data_converter)
if 'config_generator' in cfg:
cfg.config_generator.update(
dict(
dataset_name=self.dataset_name,
overwrite_cfg=self.overwrite_cfg))
self.config_generator = CFG_GENERATORS.build(cfg.config_generator)
print(f'Gathering {split} Dataset...')
gatherer.setdefault('split', default=split)
gatherer.setdefault('data_root', default=self.data_root)
gatherer.setdefault('ann_dir', default='annotations')
gatherer.setdefault(
'img_dir', default=osp.join(f'{self.task}_imgs', split))
@property
def with_obtainer(self) -> bool:
"""bool: whether the data preparer has an obtainer"""
return getattr(self, 'data_obtainer', None) is not None
gatherer = DATA_GATHERERS.build(gatherer)
img_paths, ann_paths = gatherer()
@property
def with_converter(self) -> bool:
"""bool: whether the data preparer has an converter"""
return getattr(self, 'data_converter', None) is not None
# build parser
print(f'Parsing {split} Images and Annotations...')
parser.setdefault('split', default=split)
parser.setdefault('nproc', default=self.nproc)
parser = DATA_PARSERS.build(parser)
# Convert dataset annotations to MMOCR format
samples = parser(img_paths, ann_paths)
@property
def with_config_generator(self) -> bool:
"""bool: whether the data preparer has a config generator"""
return getattr(self, 'config_generator', None) is not None
# build packer
print(f'Packing {split} Annotations...')
packer.setdefault('split', default=split)
packer.setdefault('nproc', default=self.nproc)
packer.setdefault('data_root', default=self.data_root)
packer = DATA_PACKERS.build(packer)
samples = packer(samples)
# build dumper
print(f'Dumping {split} Annotations...')
# Dump annotation files
dumper.setdefault('task', default=self.task)
dumper.setdefault('split', default=split)
dumper.setdefault('data_root', default=self.data_root)
dumper = DATA_DUMPERS.build(dumper)
dumper(samples)
def generate_config(self):
if self.config_generator is None:
return
self.config_generator.setdefault(
'dataset_name', default=self.dataset_name)
self.config_generator.setdefault('data_root', default=self.data_root)
config_generator = CFG_GENERATORS.build(self.config_generator)
print('Generating base configs...')
config_generator()
def clean(self) -> None:
if self.delete is None:
return
for d in self.delete:
delete_file = osp.join(self.data_root, d)
if osp.exists(delete_file):
if osp.isdir(delete_file):
shutil.rmtree(delete_file)
else:
os.remove(delete_file)

View File

@ -1,4 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .dumpers import JsonDumper, WildreceiptOpensetDumper
from .base import BaseDumper
from .json_dumper import JsonDumper
from .wild_receipt_openset_dumper import WildreceiptOpensetDumper
__all__ = ['JsonDumper', 'WildreceiptOpensetDumper']
__all__ = ['BaseDumper', 'JsonDumper', 'WildreceiptOpensetDumper']

View File

@ -0,0 +1,35 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Any
class BaseDumper:
"""Base class for data dumpers.
Args:
task (str): Task type. Options are 'textdet', 'textrecog',
'textspotter', and 'kie'. It is usually set automatically and users
do not need to set it manually in config file in most cases.
split (str): It' s the partition of the datasets. Options are 'train',
'val' or 'test'. It is usually set automatically and users do not
need to set it manually in config file in most cases. Defaults to
None.
data_root (str): The root directory of the image and
annotation. It is usually set automatically and users do not need
to set it manually in config file in most cases. Defaults to None.
"""
def __init__(self, task: str, split: str, data_root: str) -> None:
self.task = task
self.split = split
self.data_root = data_root
def __call__(self, data: Any) -> None:
"""Call function.
Args:
data (Any): Data to be dumped.
"""
self.dump(data)
def dump(self, data: Any) -> None:
raise NotImplementedError

View File

@ -1,49 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List
import mmengine
from mmocr.utils import list_to_file
from ..data_preparer import DATA_DUMPERS
@DATA_DUMPERS.register_module()
class JsonDumper:
def __init__(self, task: str) -> None:
self.task = task
def dump(self, data: Dict, data_root: str, split: str) -> None:
"""Dump data to json file.
Args:
data (Dict): Data to be dumped.
data_root (str): Root directory of data.
split (str): Split of data.
cfg_path (str): Path to configs. Defaults to 'configs/'.
"""
filename = f'{self.task}_{split}.json'
dst_file = osp.join(data_root, filename)
mmengine.dump(data, dst_file)
@DATA_DUMPERS.register_module()
class WildreceiptOpensetDumper:
def __init__(self, task: str) -> None:
self.task = task
def dump(self, data: List, data_root: str, split: str):
"""Dump data to txt file.
Args:
data (List): Data to be dumped.
data_root (str): Root directory of data.
split (str): Split of data.
"""
filename = f'openset_{split}.txt'
dst_file = osp.join(data_root, filename)
list_to_file(dst_file, data)

View File

@ -0,0 +1,24 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict
import mmengine
from ..data_preparer import DATA_DUMPERS
from .base import BaseDumper
@DATA_DUMPERS.register_module()
class JsonDumper(BaseDumper):
"""Dumper for json file."""
def dump(self, data: Dict) -> None:
"""Dump data to json file.
Args:
data (Dict): Data to be dumped.
"""
filename = f'{self.task}_{self.split}.json'
dst_file = osp.join(self.data_root, filename)
mmengine.dump(data, dst_file)

View File

@ -0,0 +1,22 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List
from mmocr.utils import list_to_file
from ..data_preparer import DATA_DUMPERS
from .base import BaseDumper
@DATA_DUMPERS.register_module()
class WildreceiptOpensetDumper(BaseDumper):
def dump(self, data: List):
"""Dump data to txt file.
Args:
data (List): Data to be dumped.
"""
filename = f'openset_{self.split}.txt'
dst_file = osp.join(self.data_root, filename)
list_to_file(dst_file, data)

View File

@ -0,0 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseGatherer
from .mono_gatherer import MonoGatherer
from .naf_gatherer import NAFGatherer
from .pair_gatherer import PairGatherer
__all__ = ['BaseGatherer', 'MonoGatherer', 'PairGatherer', 'NAFGatherer']

View File

@ -0,0 +1,49 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List, Optional, Tuple, Union
class BaseGatherer:
"""Base class for gatherer.
Note: Gatherer assumes that all the annotation file is in the same
directory and all the image files are in the same directory.
Args:
img_dir(str): The directory of the images. It is usually set
automatically to f'text{task}_imgs/split' and users do not need to
set it manually in config file in most cases. When the image files
is not in 'text{task}_imgs/split' directory, users should set it.
Defaults to ''.
ann_dir (str): The directory of the annotation files. It is usually set
automatically to 'annotations' and users do not need to set it
manually in config file in most cases. When the annotation files
is not in 'annotations' directory, users should set it. Defaults to
'annotations'.
split (str, optional): List of splits to gather. It' s the partition of
the datasets. Options are 'train', 'val' or 'test'. It is usually
set automatically and users do not need to set it manually in
config file in most cases. Defaults to None.
data_root (str, optional): The root directory of the image and
annotation. It is usually set automatically and users do not need
to set it manually in config file in most cases. Defaults to None.
"""
def __init__(self,
img_dir: str = '',
ann_dir: str = 'annotations',
split: Optional[str] = None,
data_root: Optional[str] = None) -> None:
self.split = split
self.data_root = data_root
self.ann_dir = osp.join(data_root, ann_dir)
self.img_dir = osp.join(data_root, img_dir)
def __call__(self) -> Union[Tuple[List[str], List[str]], Tuple[str, str]]:
"""The return value of the gatherer is a tuple of two lists or strings.
The first element is the list of image paths or the directory of the
images. The second element is the list of annotation paths or the path
of the annotation file which contains all the annotations.
"""
raise NotImplementedError

View File

@ -0,0 +1,34 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Tuple
from ..data_preparer import DATA_GATHERERS
from .base import BaseGatherer
@DATA_GATHERERS.register_module()
class MonoGatherer(BaseGatherer):
"""Gather the dataset file. Specifically for the case that only one
annotation file is needed. For example,
img_001.jpg \
img_002.jpg ---> train.json
img_003.jpg /
Args:
ann_name (str): The name of the annotation file.
"""
def __init__(self, ann_name: str, **kwargs) -> None:
super().__init__(**kwargs)
self.ann_name = ann_name
def __call__(self) -> Tuple[str, str]:
"""
Returns:
tuple(str, str): The directory of the image and the path of
annotation file.
"""
return (self.img_dir, osp.join(self.ann_dir, self.ann_name))

View File

@ -0,0 +1,66 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os
import os.path as osp
import shutil
from typing import List, Tuple
from ..data_preparer import DATA_GATHERERS
from .base import BaseGatherer
@DATA_GATHERERS.register_module()
class NAFGatherer(BaseGatherer):
"""Gather the dataset file from NAF dataset. Specifically for the case that
there is a split file that contains the names of different splits. For
example,
img_001.jpg train: img_001.jpg
img_002.jpg ---> split_file ---> test: img_002.jpg
img_003.jpg val: img_003.jpg
Args:
split_file (str, optional): The name of the split file. Defaults to
"data_split.json".
temp_dir (str, optional): The directory of the temporary images.
Defaults to "temp_images".
"""
def __init__(self,
split_file='data_split.json',
temp_dir: str = 'temp_images',
**kwargs) -> None:
super().__init__(**kwargs)
self.temp_dir = temp_dir
self.split_file = split_file
def __call__(self) -> Tuple[List[str], List[str]]:
"""
Returns:
tuple(list[str], list[str]): The list of image paths and the list
of annotation paths.
"""
split_file = osp.join(self.data_root, self.split_file)
with open(split_file, 'r') as f:
split_data = json.load(f)
img_list = list()
ann_list = list()
# Rename the key
split_data['val'] = split_data.pop('valid')
if not osp.exists(self.img_dir):
os.makedirs(self.img_dir)
current_split_data = split_data[self.split]
for groups in current_split_data:
for img_name in current_split_data[groups]:
src_img = osp.join(self.data_root, self.temp_dir, img_name)
dst_img = osp.join(self.img_dir, img_name)
if not osp.exists(src_img):
Warning(f'{src_img} does not exist!')
continue
# move the image to the new path
shutil.move(src_img, dst_img)
ann = osp.join(self.ann_dir, img_name.replace('.jpg', '.json'))
img_list.append(dst_img)
ann_list.append(ann)
return img_list, ann_list

View File

@ -0,0 +1,59 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import re
from typing import List, Optional, Tuple
from mmocr.utils import list_files
from ..data_preparer import DATA_GATHERERS
from .base import BaseGatherer
@DATA_GATHERERS.register_module()
class PairGatherer(BaseGatherer):
"""Gather the dataset files. Specifically for the paired annotations. That
is to say, each image has a corresponding annotation file. For example,
img_1.jpg <---> gt_img_1.txt
img_2.jpg <---> gt_img_2.txt
img_3.jpg <---> gt_img_3.txt
Args:
img_suffixes (List[str]): File suffixes that used for searching.
rule (Sequence): The rule for pairing the files. The first element is
the matching pattern for the file, and the second element is the
replacement pattern, which should be a regular expression. For
example, to map the image name img_1.jpg to the annotation name
gt_img_1.txt, the rule is
[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt'] # noqa: W605 E501
Note: PairGatherer assumes that each split annotation file is in the
correspond split directory. For example, all the train annotation files are
in {ann_dir}/train.
"""
def __init__(self,
img_suffixes: Optional[List[str]] = None,
rule: Optional[List[str]] = None,
**kwargs) -> None:
super().__init__(**kwargs)
self.rule = rule
self.img_suffixes = img_suffixes
# ann_dir = {ann_root}/{ann_dir}/{split}
self.ann_dir = osp.join(self.ann_dir, self.split)
def __call__(self) -> Tuple[List[str], List[str]]:
"""tuple(list, list): The list of image paths and the list of
annotation paths."""
img_list = list()
ann_list = list()
for img_path in list_files(self.img_dir, self.img_suffixes):
if not re.match(self.rule[0], osp.basename(img_path)):
continue
ann_name = re.sub(self.rule[0], self.rule[1],
osp.basename(img_path))
ann_path = osp.join(self.ann_dir, ann_name)
img_list.append(img_path)
ann_list.append(ann_path)
return img_list, ann_list

View File

@ -0,0 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .naive_data_obtainer import NaiveDataObtainer
__all__ = ['NaiveDataObtainer']

View File

@ -10,7 +10,7 @@ from typing import Dict, List, Optional, Tuple
from mmengine import mkdir_or_exist
from mmocr.utils import check_integrity, is_archive
from .data_preparer import DATA_OBTAINERS
from ..data_preparer import DATA_OBTAINERS
ssl._create_default_https_context = ssl._create_unverified_context
@ -24,8 +24,12 @@ class NaiveDataObtainer:
Args:
files (list[dict]): A list of file information.
cache_path (str): The path to cache the downloaded files.
data_root (str): The root path of the dataset.
task (str): The task of the dataset.
data_root (str): The root path of the dataset. It is usually set auto-
matically and users do not need to set it manually in config file
in most cases.
task (str): The task of the dataset. It is usually set automatically
and users do not need to set it manually in config file
in most cases.
"""
def __init__(self, files: List[Dict], cache_path: str, data_root: str,
@ -114,6 +118,23 @@ class NaiveDataObtainer:
dst_path = osp.join(osp.dirname(src_path), zip_name)
else:
dst_path = osp.join(dst_path, zip_name)
extracted = False
if osp.exists(dst_path):
name = set(os.listdir(dst_path))
if '.finish' in name:
extracted = True
elif '.finish' not in name and len(name) > 0:
while True:
c = input(f'{dst_path} already exists when extracting '
'{zip_name}, whether to unzip again? (y/n)')
if c.lower() in ['y', 'n']:
extracted = c == 'n'
break
if extracted:
open(osp.join(dst_path, '.finish'), 'w').close()
print(f'{zip_name} has been extracted. Skip')
return
mkdir_or_exist(dst_path)
print(f'Extracting: {osp.basename(src_path)}')
if src_path.endswith('.zip'):
@ -136,6 +157,8 @@ class NaiveDataObtainer:
'Please install tarfile by running "pip install tarfile".')
with tarfile.open(src_path, mode) as tar_ref:
tar_ref.extractall(dst_path)
open(osp.join(dst_path, '.finish'), 'w').close()
if delete:
os.remove(src_path)

View File

@ -0,0 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BasePacker
from .textdet_packer import TextDetPacker
from .textrecog_packer import TextRecogCropPacker, TextRecogPacker
from .textspotting_packer import TextSpottingPacker
from .wildreceipt_packer import WildReceiptPacker
__all__ = [
'BasePacker', 'TextDetPacker', 'TextRecogPacker', 'TextRecogCropPacker',
'TextSpottingPacker', 'WildReceiptPacker'
]

View File

@ -0,0 +1,57 @@
# Copyright (c) OpenMMLab. All rights reserved.
from abc import abstractmethod
from typing import Dict, List, Tuple
from mmengine import track_parallel_progress
class BasePacker:
"""Base class for packing the parsed annotation info to MMOCR format.
Args:
data_root (str): The root path of the dataset. It is usually set auto-
matically and users do not need to set it manually in config file
in most cases.
split (str): The split of the dataset. It is usually set automatically
and users do not need to set it manually in config file in most
cases.
nproc (int): Number of processes to process the data. Defaults to 1.
It is usually set automatically and users do not need to set it
manually in config file in most cases.
"""
def __init__(self, data_root: str, split: str, nproc: int = 1) -> None:
self.data_root = data_root
self.split = split
self.nproc = nproc
@abstractmethod
def pack_instance(self, sample: Tuple, split: str) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, ann_file).
- img_path (str): Path to image file.
- instances (Sequence[Dict]): A list of converted annos.
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
@abstractmethod
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
def __call__(self, samples) -> Dict:
samples = track_parallel_progress(
self.pack_instance, samples, nproc=self.nproc)
samples = self.add_meta(samples)
return samples

View File

@ -0,0 +1,110 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Tuple
import mmcv
from mmocr.utils import bbox2poly, poly2bbox
from ..data_preparer import DATA_PACKERS
from .base import BasePacker
@DATA_PACKERS.register_module()
class TextDetPacker(BasePacker):
"""Text detection packer. It is used to pack the parsed annotation info to.
.. code-block:: python
{
"metainfo":
{
"dataset_type": "TextDetDataset",
"task_name": "textdet",
"category": [{"id": 0, "name": "text"}]
},
"data_list":
[
{
"img_path": "test_img.jpg",
"height": 640,
"width": 640,
"instances":
[
{
"polygon": [0, 0, 0, 10, 10, 20, 20, 0],
"bbox": [0, 0, 10, 20],
"bbox_label": 0,
"ignore": False
},
// ...
]
}
]
}
"""
def pack_instance(self, sample: Tuple, bbox_label: int = 0) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, instances).
- img_path (str): Path to the image file.
- instances (Sequence[Dict]): A list of converted annos. Each
element should be a dict with the following keys:
- 'poly' or 'box'
- 'ignore'
- 'bbox_label' (optional)
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
img_path, instances = sample
img = mmcv.imread(img_path)
h, w = img.shape[:2]
packed_instances = list()
for instance in instances:
poly = instance.get('poly', None)
box = instance.get('box', None)
assert box or poly
packed_sample = dict(
polygon=poly if poly else list(
bbox2poly(box).astype('float64')),
bbox=box if box else list(poly2bbox(poly).astype('float64')),
bbox_label=bbox_label,
ignore=instance['ignore'])
packed_instances.append(packed_sample)
packed_instances = dict(
instances=packed_instances,
img_path=osp.relpath(img_path, self.data_root),
height=h,
width=w)
return packed_instances
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
meta = {
'metainfo': {
'dataset_type': 'TextDetDataset',
'task_name': 'textdet',
'category': [{
'id': 0,
'name': 'text'
}]
},
'data_list': sample
}
return meta

View File

@ -0,0 +1,178 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Tuple
import mmcv
from mmengine import mkdir_or_exist
from mmocr.utils import bbox2poly, crop_img, poly2bbox, warp_img
from ..data_preparer import DATA_PACKERS
from .base import BasePacker
@DATA_PACKERS.register_module()
class TextRecogPacker(BasePacker):
"""Text recogntion packer. It is used to pack the parsed annotation info
to:
.. code-block:: python
{
"metainfo":
{
"dataset_type": "TextRecogDataset",
"task_name": "textrecog",
},
"data_list":
[
{
"img_path": "textrecog_imgs/train/test_img.jpg",
"instances":
[
{
"text": "GRAND"
}
]
}
]
}
"""
def pack_instance(self, sample: Tuple) -> Dict:
"""Pack the text info to a recognition instance.
Args:
samples (Tuple): A tuple of (img_name, text).
split (str): The split of the instance.
Returns:
Dict: The packed instance.
"""
img_name, text = sample
# TODO: remove hard code
packed_instance = dict(
instances=[dict(text=text)],
img_path=osp.join('textrecog_imgs', self.split,
osp.basename(img_name)))
return packed_instance
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
meta = {
'metainfo': {
'dataset_type': 'TextRecogDataset',
'task_name': 'textrecog'
},
'data_list': sample
}
return meta
@DATA_PACKERS.register_module()
class TextRecogCropPacker(TextRecogPacker):
"""Text recognition packer with image cropper. It is used to pack the
parsed annotation info and crop out the word images from the full-size
ones.
Args:
crop_with_warp (bool): Whether to crop the text from the original
image using opencv warpPerspective.
jitter (bool): (Applicable when crop_with_warp=True)
Whether to jitter the box.
jitter_ratio_x (float): (Applicable when crop_with_warp=True)
Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): (Applicable when crop_with_warp=True)
Vertical jitter ratio relative to the height.
long_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the long edge of the cropped image.
Defaults to 0.1.
short_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the short edge of the cropped image.
Defaults to 0.05.
"""
def __init__(self,
crop_with_warp: bool = False,
jitter: bool = False,
jitter_ratio_x: float = 0.0,
jitter_ratio_y: float = 0.0,
long_edge_pad_ratio: float = 0.0,
short_edge_pad_ratio: float = 0.0,
**kwargs):
super().__init__(**kwargs)
self.crop_with_warp = crop_with_warp
self.jitter = jitter
self.jrx = jitter_ratio_x
self.jry = jitter_ratio_y
self.lepr = long_edge_pad_ratio
self.sepr = short_edge_pad_ratio
# Crop converter crops the images of textdet to patches
self.cropped_img_dir = 'textrecog_imgs'
self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir)
mkdir_or_exist(self.crop_save_path)
mkdir_or_exist(osp.join(self.crop_save_path, self.split))
def pack_instance(self, sample: Tuple) -> List:
"""Crop patches from image.
Args:
samples (Tuple): A tuple of (img_name, text).
Return:
List: The list of cropped patches.
"""
def get_box(instance: Dict) -> List:
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
if 'poly' in instance:
return bbox2poly(poly2bbox(instance['poly'])).tolist()
def get_poly(instance: Dict) -> List:
if 'poly' in instance:
return instance['poly']
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
data_list = []
img_path, instances = sample
img = mmcv.imread(img_path)
for i, instance in enumerate(instances):
if instance['ignore']:
continue
if self.crop_with_warp:
poly = get_poly(instance)
patch = warp_img(img, poly, self.jitter, self.jrx, self.jry)
else:
box = get_box(instance)
patch = crop_img(img, box, self.lepr, self.sepr)
if patch.shape[0] == 0 or patch.shape[1] == 0:
continue
text = instance['text']
patch_name = osp.splitext(
osp.basename(img_path))[0] + f'_{i}' + osp.splitext(
osp.basename(img_path))[1]
dst_path = osp.join(self.crop_save_path, self.split, patch_name)
mmcv.imwrite(patch, dst_path)
rec_instance = dict(
instances=[dict(text=text)],
img_path=osp.join(self.cropped_img_dir, self.split,
patch_name))
data_list.append(rec_instance)
return data_list
def add_meta(self, sample: List) -> Dict:
# Since the TextRecogCropConverter packs all of the patches in a single
# image into a list, we need to flatten the list.
sample = [item for sublist in sample for item in sublist]
return super().add_meta(sample)

View File

@ -0,0 +1,113 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Tuple
import mmcv
from mmocr.utils import bbox2poly, poly2bbox
from ..data_preparer import DATA_PACKERS
from .base import BasePacker
@DATA_PACKERS.register_module()
class TextSpottingPacker(BasePacker):
"""Text spotting packer. It is used to pack the parsed annotation info to:
.. code-block:: python
{
"metainfo":
{
"dataset_type": "TextDetDataset",
"task_name": "textdet",
"category": [{"id": 0, "name": "text"}]
},
"data_list":
[
{
"img_path": "test_img.jpg",
"height": 640,
"width": 640,
"instances":
[
{
"polygon": [0, 0, 0, 10, 10, 20, 20, 0],
"bbox": [0, 0, 10, 20],
"bbox_label": 0,
"ignore": False,
"text": "mmocr"
},
// ...
]
}
]
}
"""
def pack_instance(self, sample: Tuple, bbox_label: int = 0) -> Dict:
"""Pack the parsed annotation info to an MMOCR format instance.
Args:
sample (Tuple): A tuple of (img_file, ann_file).
- img_path (str): Path to image file.
- instances (Sequence[Dict]): A list of converted annos. Each
element should be a dict with the following keys:
- 'poly' or 'box'
- 'text'
- 'ignore'
- 'bbox_label' (optional)
split (str): The split of the instance.
Returns:
Dict: An MMOCR format instance.
"""
img_path, instances = sample
img = mmcv.imread(img_path)
h, w = img.shape[:2]
packed_instances = list()
for instance in instances:
assert 'text' in instance, 'Text is not found in the instance.'
poly = instance.get('poly', None)
box = instance.get('box', None)
assert box or poly
packed_sample = dict(
polygon=poly if poly else list(
bbox2poly(box).astype('float64')),
bbox=box if box else list(poly2bbox(poly).astype('float64')),
bbox_label=bbox_label,
ignore=instance['ignore'],
text=instance['text'])
packed_instances.append(packed_sample)
packed_instances = dict(
instances=packed_instances,
img_path=osp.relpath(img_path, self.data_root),
height=h,
width=w)
return packed_instances
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
meta = {
'metainfo': {
'dataset_type': 'TextSpottingDataset',
'task_name': 'textspotting',
'category': [{
'id': 0,
'name': 'text'
}]
},
'data_list': sample
}
return meta

View File

@ -0,0 +1,112 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
from typing import List
from ..data_preparer import DATA_PACKERS
from .base import BasePacker
@DATA_PACKERS.register_module()
class WildReceiptPacker(BasePacker):
"""Pack the wildreceipt annotation to MMOCR format.
Args:
merge_bg_others (bool): If True, give the same label to "background"
class and "others" class. Defaults to True.
ignore_idx (int): Index for ``ignore`` class. Defaults to 0.
others_idx (int): Index for ``others`` class. Defaults to 25.
"""
def __init__(self,
merge_bg_others: bool = False,
ignore_idx: int = 0,
others_idx: int = 25,
**kwargs) -> None:
super().__init__(**kwargs)
self.ignore_idx = ignore_idx
self.others_idx = others_idx
self.merge_bg_others = merge_bg_others
def add_meta(self, samples: List) -> List:
"""No meta info is required for the wildreceipt dataset."""
return samples
def pack_instance(self, sample: str):
"""Pack line-json str of close set to line-json str of open set.
Args:
sample (str): The string to be deserialized to
the close set dictionary object.
split (str): The split of the instance.
"""
# Two labels at the same index of the following two lists
# make up a key-value pair. For example, in wildreceipt,
# closeset_key_inds[0] maps to "Store_name_key"
# and closeset_value_inds[0] maps to "Store_addr_value".
closeset_key_inds = list(range(2, self.others_idx, 2))
closeset_value_inds = list(range(1, self.others_idx, 2))
openset_node_label_mapping = {
'bg': 0,
'key': 1,
'value': 2,
'others': 3
}
if self.merge_bg_others:
openset_node_label_mapping['others'] = openset_node_label_mapping[
'bg']
closeset_obj = json.loads(sample)
openset_obj = {
'file_name':
closeset_obj['file_name'].replace(self.data_root + '/', ''),
'height':
closeset_obj['height'],
'width':
closeset_obj['width'],
'annotations': []
}
edge_idx = 1
label_to_edge = {}
for anno in closeset_obj['annotations']:
label = anno['label']
if label == self.ignore_idx:
anno['label'] = openset_node_label_mapping['bg']
anno['edge'] = edge_idx
edge_idx += 1
elif label == self.others_idx:
anno['label'] = openset_node_label_mapping['others']
anno['edge'] = edge_idx
edge_idx += 1
else:
edge = label_to_edge.get(label, None)
if edge is not None:
anno['edge'] = edge
if label in closeset_key_inds:
anno['label'] = openset_node_label_mapping['key']
elif label in closeset_value_inds:
anno['label'] = openset_node_label_mapping['value']
else:
tmp_key = 'key'
if label in closeset_key_inds:
label_with_same_edge = closeset_value_inds[
closeset_key_inds.index(label)]
elif label in closeset_value_inds:
label_with_same_edge = closeset_key_inds[
closeset_value_inds.index(label)]
tmp_key = 'value'
edge_counterpart = label_to_edge.get(
label_with_same_edge, None)
if edge_counterpart is not None:
anno['edge'] = edge_counterpart
else:
anno['edge'] = edge_idx
edge_idx += 1
anno['label'] = openset_node_label_mapping[tmp_key]
label_to_edge[label] = anno['edge']
openset_obj['annotations'] = closeset_obj['annotations']
return json.dumps(openset_obj, ensure_ascii=False)

View File

@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseParser
from .coco_parser import COCOTextDetAnnParser
from .funsd_parser import FUNSDTextDetAnnParser
from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
@ -10,7 +11,7 @@ from .totaltext_parser import TotaltextTextDetAnnParser
from .wildreceipt_parser import WildreceiptKIEAnnParser
__all__ = [
'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
'BaseParser', 'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
'COCOTextDetAnnParser', 'SVTTextDetAnnParser', 'FUNSDTextDetAnnParser',
'SROIETextDetAnnParser', 'NAFAnnParser'

View File

@ -1,67 +1,87 @@
# Copyright (c) OpenMMLab. All rights reserved.
from abc import abstractmethod
from functools import partial
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Tuple, Union
from mmengine import track_parallel_progress
from mmocr.utils import track_parallel_progress_multi_args
class BaseParser:
"""Base class for parsing annotations.
Args:
data_root (str, optional): Path to the data root. Defaults to None.
nproc (int, optional): Number of processes. Defaults to 1.
split (str): The split of the dataset. It is usually set automatically
and users do not need to set it manually in config file in most
cases.
nproc (int): Number of processes to process the data. Defaults to 1.
It is usually set automatically and users do not need to set it
manually in config file in most cases.
"""
def __init__(self,
data_root: Optional[str] = None,
nproc: int = 1) -> None:
self.data_root = data_root
def __init__(self, split: str, nproc: int = 1) -> None:
self.nproc = nproc
self.split = split
def __call__(self, files: List[Tuple], split: str) -> List:
def __call__(self, img_paths: Union[List[str], str],
ann_paths: Union[List[str], str]) -> List[Tuple]:
"""Parse annotations.
Args:
files (List[Tuple]): A list of a tuple of
(image_path, annotation_path).
split (str): The split of the dataset.
img_paths (str or list[str]): the list of image paths or the
directory of the images.
ann_paths (str or list[str]): the list of annotation paths or the
path of the annotation file which contains all the annotations.
Returns:
List: A list of a tuple of (image_path, instances)
"""
samples = self.parse_files(files, split)
samples = self.parse_files(img_paths, ann_paths)
return samples
def parse_files(self, files: List[Tuple], split: str) -> List[Tuple]:
def parse_files(self, img_paths: Union[List[str], str],
ann_paths: Union[List[str], str]) -> List[Tuple]:
"""Convert annotations to MMOCR format.
Args:
files (Tuple): A list of tuple of path to image and annotation.
img_paths (str or list[str]): the list of image paths or the
directory of the images.
ann_paths (str or list[str]): the list of annotation paths or the
path of the annotation file which contains all the annotations.
Returns:
List[Tuple]: A list of a tuple of (image_path, instances)
List[Tuple]: A list of a tuple of (image_path, instances).
- img_path (str): The path of image file, which can be read
directly by opencv.
- instance: instance is a list of dict containing parsed
annotations, which should contain the following keys:
- 'poly' or 'box' (textdet or textspotting)
- 'text' (textspotting or textrecog)
- 'ignore' (all task)
"""
func = partial(self.parse_file, split=split)
samples = track_parallel_progress(func, files, nproc=self.nproc)
samples = track_parallel_progress_multi_args(
self.parse_file, (img_paths, ann_paths), nproc=self.nproc)
return samples
@abstractmethod
def parse_file(self, file: Tuple, split: str) -> Tuple:
def parse_file(self, img_path: str, ann_path: str) -> Tuple:
"""Convert annotation for a single image.
Args:
file (Tuple): A tuple of path to image and annotation
split (str): Current split.
img_path (str): The path of image.
ann_path (str): The path of annotation.
Returns:
Tuple: A tuple of (img_path, instance). Instance is a list of dict
containing parsed annotations, which should contain the
following keys:
- 'poly' or 'box' (textdet or textspotting)
- 'text' (textspotting or textrecog)
- 'ignore' (all task)
Tuple: A tuple of (img_path, instance).
- img_path (str): The path of image file, which can be read
directly by opencv.
- instance: instance is a list of dict containing parsed
annotations, which should contain the following keys:
- 'poly' or 'box' (textdet or textspotting)
- 'text' (textspotting or textrecog)
- 'ignore' (all task)
Examples:
An example of returned values:

View File

@ -1,6 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, Tuple
from typing import List
from mmdet.datasets.api_wrappers import COCO
@ -21,25 +21,25 @@ class COCOTextDetAnnParser(BaseParser):
"""
def __init__(self,
data_root: str = None,
split: str,
nproc: int = 1,
variant: str = 'standard') -> None:
super().__init__(nproc=nproc, data_root=data_root)
super().__init__(nproc=nproc, split=split)
assert variant in ['standard', 'cocotext', 'textocr'], \
f'variant {variant} is not supported'
self.variant = variant
def parse_files(self, files: Tuple, split: str = None) -> Dict:
def parse_files(self, img_dir: str, ann_path: str) -> List:
"""Parse single annotation."""
samples = list()
coco = COCO(files)
coco = COCO(ann_path)
if self.variant == 'cocotext' or self.variant == 'textocr':
# cocotext stores both 'train' and 'val' split in one annotation
# file, and uses the 'set' field to distinguish them.
if self.variant == 'cocotext':
for img in coco.dataset['imgs']:
if split == coco.dataset['imgs'][img]['set']:
if self.split == coco.dataset['imgs'][img]['set']:
coco.imgs[img] = coco.dataset['imgs'][img]
# textocr stores 'train' and 'val'split separately
elif self.variant == 'textocr':
@ -60,8 +60,6 @@ class COCOTextDetAnnParser(BaseParser):
img_info = coco.load_imgs([img_id])[0]
img_info['img_id'] = img_id
img_path = img_info['file_name']
if self.data_root is not None:
img_path = osp.join(self.data_root, img_path)
ann_ids = coco.get_ann_ids(img_ids=[img_id])
if len(ann_ids) == 0:
continue
@ -96,5 +94,6 @@ class COCOTextDetAnnParser(BaseParser):
instances.append(
dict(
poly=ann['points'], text=text, ignore=text == '.'))
samples.append((img_path, instances))
samples.append((osp.join(img_dir,
osp.basename(img_path)), instances))
return samples

View File

@ -17,17 +17,13 @@ class FUNSDTextDetAnnParser(BaseParser):
to 1.
"""
def __init__(self, nproc: int = 1) -> None:
super().__init__(nproc=nproc)
def parse_file(self, file: Tuple, split: str) -> Tuple:
def parse_file(self, img_path: str, ann_path: str) -> Tuple:
"""Parse single annotation."""
img_file, json_file = file
instances = list()
for poly, text, ignore in self.loader(json_file):
for poly, text, ignore in self.loader(ann_path):
instances.append(dict(poly=poly, text=text, ignore=ignore))
return img_file, instances
return img_path, instances
def loader(self, file_path: str):
with open(file_path, 'r') as f:

View File

@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List, Optional, Tuple
from mmocr.utils import bbox2poly
@ -35,22 +36,21 @@ class ICDARTxtTextDetAnnParser(BaseParser):
ignore: str = '###',
format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
encoding: str = 'utf-8',
nproc: int = 1,
remove_strs: Optional[List[str]] = None,
mode: str = None) -> None:
mode: str = None,
**kwargs) -> None:
self.sep = separator
self.format = format
self.encoding = encoding
self.ignore = ignore
self.mode = mode
self.remove_strs = remove_strs
super().__init__(nproc=nproc)
super().__init__(**kwargs)
def parse_file(self, file: Tuple, split: str) -> Tuple:
def parse_file(self, img_path: str, ann_path: str) -> Tuple:
"""Parse single annotation."""
img_file, txt_file = file
instances = list()
for anno in self.loader(txt_file, self.sep, self.format,
for anno in self.loader(ann_path, self.sep, self.format,
self.encoding):
anno = list(anno.values())
if self.remove_strs is not None:
@ -66,7 +66,7 @@ class ICDARTxtTextDetAnnParser(BaseParser):
instances.append(
dict(poly=poly, text=text, ignore=text == self.ignore))
return img_file, instances
return img_path, instances
@DATA_PARSERS.register_module()
@ -97,21 +97,21 @@ class ICDARTxtTextRecogAnnParser(BaseParser):
ignore: str = '#',
format: str = 'img,text',
encoding: str = 'utf-8',
nproc: int = 1,
remove_strs: Optional[List[str]] = ['"']) -> None:
remove_strs: Optional[List[str]] = ['"'],
**kwargs) -> None:
self.sep = separator
self.format = format
self.encoding = encoding
self.ignore = ignore
self.remove_strs = remove_strs
super().__init__(nproc=nproc)
super().__init__(**kwargs)
def parse_files(self, files: str, split: str) -> List:
def parse_files(self, img_dir: str, ann_path: str) -> List:
"""Parse annotations."""
assert isinstance(files, str)
assert isinstance(ann_path, str)
samples = list()
for anno in self.loader(
file_path=files,
file_path=ann_path,
format=self.format,
encoding=self.encoding,
separator=self.sep):
@ -122,6 +122,6 @@ class ICDARTxtTextRecogAnnParser(BaseParser):
if text == self.ignore:
continue
img_name = anno['img']
samples.append((img_name, text))
samples.append((osp.join(img_dir, osp.basename(img_name)), text))
return samples

View File

@ -1,6 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
from typing import Dict, List, Tuple
from typing import List, Tuple
import numpy as np
@ -28,32 +28,28 @@ class NAFAnnParser(BaseParser):
"" (empty string) is if the field was blank
Args:
data_root (str): Path to the dataset root.
ignore (list(str)): The text of the ignored instances. Default: ['#'].
det (bool): Whether to parse the detection annotation. Default: True.
If False, the parser will consider special case in NAF dataset
where the transcription is not available.
nproc (int): Number of processes to load the data. Default: 1.
"""
def __init__(self,
data_root: str,
ignore: List[str] = ['#'],
det: bool = True,
nproc: int = 1) -> None:
**kwargs) -> None:
self.ignore = ignore
self.det = det
super().__init__(data_root=data_root, nproc=nproc)
super().__init__(**kwargs)
def parse_file(self, file: Tuple, split: str) -> Dict:
def parse_file(self, img_path: str, ann_path: str) -> Tuple:
"""Convert single annotation."""
img_file, json_file = file
instances = list()
for poly, text in self.loader(json_file):
for poly, text in self.loader(ann_path):
instances.append(
dict(poly=poly, text=text, ignore=text in self.ignore))
return img_file, instances
return img_path, instances
def loader(self, file_path: str) -> str:
"""Load the annotation of the NAF dataset.

View File

@ -31,6 +31,7 @@ class SROIETextDetAnnParser(BaseParser):
"""
def __init__(self,
split: str,
separator: str = ',',
ignore: str = '###',
format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
@ -44,16 +45,15 @@ class SROIETextDetAnnParser(BaseParser):
self.ignore = ignore
self.mode = mode
self.remove_strs = remove_strs
super().__init__(nproc=nproc)
super().__init__(nproc=nproc, split=split)
def parse_file(self, file: Tuple, split: str) -> Tuple:
def parse_file(self, img_path: str, ann_path: str) -> Tuple:
"""Parse single annotation."""
img_file, txt_file = file
instances = list()
try:
# there might be some illegal symbols in the annotation
# which cannot be parsed by loader
for anno in self.loader(txt_file, self.sep, self.format,
for anno in self.loader(ann_path, self.sep, self.format,
self.encoding):
anno = list(anno.values())
if self.remove_strs is not None:
@ -71,4 +71,4 @@ class SROIETextDetAnnParser(BaseParser):
except Exception:
pass
return img_file, instances
return img_path, instances

View File

@ -17,15 +17,13 @@ class SVTTextDetAnnParser(BaseParser):
to 1.
"""
def __init__(self, data_root: str = None, nproc: int = 1) -> None:
super().__init__(data_root=data_root, nproc=nproc)
def parse_files(self, files: str, split: str) -> List:
def parse_files(self, img_dir: str, ann_path: str) -> List:
"""Parse annotations."""
assert isinstance(files, str)
assert isinstance(ann_path, str)
samples = list()
for img_name, instance in self.loader(files):
samples.append((img_name, instance))
for img_name, instance in self.loader(ann_path):
samples.append((osp.join(img_dir,
osp.basename(img_name)), instance))
return samples
@ -45,8 +43,7 @@ class SVTTextDetAnnParser(BaseParser):
tree = ET.parse(file_path)
root = tree.getroot()
for image in root.findall('image'):
image_name = osp.join(self.data_root, 'textdet_imgs',
image.find('imageName').text)
image_name = image.find('imageName').text
instances = list()
for rectangle in image.find('taggedRectangles'):
x = int(rectangle.get('x'))

View File

@ -23,22 +23,18 @@ class TotaltextTextDetAnnParser(BaseParser):
nproc (int): Number of processes to load the data. Default: 1.
"""
def __init__(self,
data_root: str,
ignore: str = '#',
nproc: int = 1) -> None:
def __init__(self, ignore: str = '#', **kwargs) -> None:
self.ignore = ignore
super().__init__(data_root=data_root, nproc=nproc)
super().__init__(**kwargs)
def parse_file(self, file: Tuple, split: str) -> Dict:
def parse_file(self, img_path: str, ann_path: str) -> Dict:
"""Convert single annotation."""
img_file, txt_file = file
instances = list()
for poly, text in self.loader(txt_file):
for poly, text in self.loader(ann_path):
instances.append(
dict(poly=poly, text=text, ignore=text == self.ignore))
return img_file, instances
return img_path, instances
def loader(self, file_path: str) -> str:
"""The annotation of the totaltext dataset may be stored in multiple

View File

@ -1,7 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
from typing import Dict, Tuple
from typing import Dict
from mmocr.utils import list_from_file
from ..data_preparer import DATA_PARSERS
@ -30,21 +30,18 @@ class WildreceiptTextDetAnnParser(BaseParser):
to 1.
"""
def __init__(self,
data_root: str,
ignore: int = 0,
nproc: int = 1) -> None:
def __init__(self, ignore: int = 0, **kwargs) -> None:
self.ignore = ignore
super().__init__(data_root=data_root, nproc=nproc)
super().__init__(**kwargs)
def parse_files(self, files: Tuple, split: str) -> Dict:
def parse_files(self, img_dir: str, ann_path) -> Dict:
"""Convert single annotation."""
closeset_lines = list_from_file(files)
closeset_lines = list_from_file(ann_path)
samples = list()
for line in closeset_lines:
instances = list()
line = json.loads(line)
img_file = osp.join(self.data_root, line['file_name'])
img_file = osp.join(img_dir, osp.basename(line['file_name']))
for anno in line['annotations']:
poly = anno['box']
text = anno['text']
@ -72,21 +69,23 @@ class WildreceiptKIEAnnParser(BaseParser):
]}
Args:
data_root (str): The root path of the dataset.
ignore (int): The label to be ignored. Defaults to 0.
nproc (int): The number of processes to parse the annotation. Defaults
to 1.
"""
def __init__(self,
data_root: str,
ignore: int = 0,
nproc: int = 1) -> None:
def __init__(self, ignore: int = 0, **kwargs) -> None:
self.ignore = ignore
super().__init__(data_root=data_root, nproc=nproc)
super().__init__(**kwargs)
def parse_files(self, files: Tuple, split: str) -> Dict:
def parse_files(self, img_dir: str, ann_path: str) -> Dict:
"""Convert single annotation."""
closeset_lines = list_from_file(files)
closeset_lines = list_from_file(ann_path)
samples = list()
for line in closeset_lines:
json_line = json.loads(line)
img_file = osp.join(img_dir, osp.basename(json_line['file_name']))
json_line['file_name'] = img_file
samples.append(json.dumps(json_line))
return closeset_lines
return samples

View File

@ -19,6 +19,7 @@ from .polygon_utils import (boundary_iou, crop_polygon, is_poly_inside_rect,
poly_union, polys2shapely, rescale_polygon,
rescale_polygons, shapely2poly, sort_points,
sort_vertex, sort_vertex8)
from .processing import track_parallel_progress_multi_args
from .setup_env import register_all_modules
from .string_utils import StringStripper
from .transform_utils import remove_pipeline_elements
@ -48,5 +49,6 @@ __all__ = [
'OptTensor', 'ColorType', 'OptKIESampleList', 'KIESampleList',
'is_archive', 'check_integrity', 'list_files', 'get_md5', 'InstanceList',
'LabelList', 'OptInstanceList', 'OptLabelList', 'RangeType',
'remove_pipeline_elements', 'bezier2poly', 'poly2bezier'
'remove_pipeline_elements', 'bezier2poly', 'poly2bezier',
'track_parallel_progress_multi_args'
]

View File

@ -0,0 +1,67 @@
# Copyright (c) OpenMMLab. All rights reserved.
import sys
from collections.abc import Iterable
from mmengine.utils.progressbar import ProgressBar, init_pool
def track_parallel_progress_multi_args(func,
args,
nproc,
initializer=None,
initargs=None,
bar_width=50,
chunksize=1,
skip_first=False,
file=sys.stdout):
"""Track the progress of parallel task execution with a progress bar.
The built-in :mod:`multiprocessing` module is used for process pools and
tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`.
Args:
func (callable): The function to be applied to each task.
tasks (tuple[Iterable]): A tuple of tasks.
nproc (int): Process (worker) number.
initializer (None or callable): Refer to :class:`multiprocessing.Pool`
for details.
initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for
details.
chunksize (int): Refer to :class:`multiprocessing.Pool` for details.
bar_width (int): Width of progress bar.
skip_first (bool): Whether to skip the first sample for each worker
when estimating fps, since the initialization step may takes
longer.
keep_order (bool): If True, :func:`Pool.imap` is used, otherwise
:func:`Pool.imap_unordered` is used.
Returns:
list: The task results.
"""
assert isinstance(args, tuple)
for arg in args:
assert isinstance(arg, Iterable)
assert len(set([len(arg)
for arg in args])) == 1, 'args must have same length'
task_num = len(args[0])
tasks = zip(*args)
pool = init_pool(nproc, initializer, initargs)
start = not skip_first
task_num -= nproc * chunksize * int(skip_first)
prog_bar = ProgressBar(task_num, bar_width, start, file=file)
results = []
gen = pool.starmap(func, tasks, chunksize)
for result in gen:
results.append(result)
if skip_first:
if len(results) < nproc * chunksize:
continue
elif len(results) == nproc * chunksize:
prog_bar.start()
continue
prog_bar.update()
prog_bar.file.write('\n')
pool.close()
pool.join()
return results

View File

@ -1,24 +0,0 @@
Name: Dummy Dataset
Paper:
Title: Dummy Dataset
URL: https://github.com/open-mmlab/mmocr
Venue: MMOCR
Year: 2022
BibTeX: ''
Data:
Website: https://github.com/open-mmlab/mmocr
Language:
- English
- Chinese
Scene:
- Natural Scene
Granularity:
- Word
Tasks:
- textdet
- textrecog
- textspotting
License:
Type: CC BY 4.0
Link: https://creativecommons.org/licenses/by/4.0/
Format: .txt

View File

@ -1,3 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
data_root = 'tests/data/preparer/dummy'
cache_path = 'tests/data/preparer/dummy'

View File

@ -0,0 +1,59 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import TextDetConfigGenerator
class TestTextDetConfigGenerator(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def test_textdet_config_generator(self):
config_generator = TextDetConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(ann_file='textdet_train.json', dataset_postfix='')
],
val_anns=[],
test_anns=[
dict(ann_file='textdet_test.json', dataset_postfix='fake')
],
config_path=self.root.name,
)
cfg_path = osp.join(self.root.name, 'textdet', '_base_', 'datasets',
'dummy.py')
config_generator()
self.assertTrue(osp.exists(cfg_path))
f = open(cfg_path, 'r')
lines = ''.join(f.readlines())
self.assertEquals(
lines, (f"dummy_textdet_data_root = '{self.root.name}'\n"
'\n'
'dummy_textdet_train = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textdet_data_root,\n'
" ann_file='textdet_train.json',\n"
' filter_cfg=dict(filter_empty_gt=True, min_size=32),\n'
' pipeline=None)\n'
'\n'
'dummy_fake_textdet_test = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textdet_data_root,\n'
" ann_file='textdet_test.json',\n"
' test_mode=True,\n'
' pipeline=None)\n'))
with self.assertRaises(ValueError):
TextDetConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(ann_file='textdet_train.json', dataset_postfix='1'),
dict(ann_file='textdet_train_1.json', dataset_postfix='1')
],
config_path=self.root.name,
)

View File

@ -0,0 +1,59 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import TextRecogConfigGenerator
class TestTextRecogConfigGenerator(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def test_textrecog_config_generator(self):
config_generator = TextRecogConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(ann_file='textrecog_train.json', dataset_postfix='')
],
val_anns=[],
test_anns=[
dict(ann_file='textrecog_test.json', dataset_postfix='fake')
],
config_path=self.root.name,
)
cfg_path = osp.join(self.root.name, 'textrecog', '_base_', 'datasets',
'dummy.py')
config_generator()
self.assertTrue(osp.exists(cfg_path))
f = open(cfg_path, 'r')
lines = ''.join(f.readlines())
self.assertEquals(lines,
(f"dummy_textrecog_data_root = '{self.root.name}'\n"
'\n'
'dummy_textrecog_train = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textrecog_data_root,\n'
" ann_file='textrecog_train.json',\n"
' pipeline=None)\n'
'\n'
'dummy_fake_textrecog_test = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textrecog_data_root,\n'
" ann_file='textrecog_test.json',\n"
' test_mode=True,\n'
' pipeline=None)\n'))
with self.assertRaises(ValueError):
TextRecogConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(ann_file='textrecog_train.json', dataset_postfix='1'),
dict(
ann_file='textrecog_train_1.json', dataset_postfix='1')
],
config_path=self.root.name,
)

View File

@ -0,0 +1,64 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import TextSpottingConfigGenerator
class TestTextSpottingConfigGenerator(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def test_textspotting_config_generator(self):
config_generator = TextSpottingConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(ann_file='textspotting_train.json', dataset_postfix='')
],
val_anns=[],
test_anns=[
dict(
ann_file='textspotting_test.json', dataset_postfix='fake')
],
config_path=self.root.name,
)
cfg_path = osp.join(self.root.name, 'textspotting', '_base_',
'datasets', 'dummy.py')
config_generator()
self.assertTrue(osp.exists(cfg_path))
f = open(cfg_path, 'r')
lines = ''.join(f.readlines())
self.assertEquals(
lines, (f"dummy_textspotting_data_root = '{self.root.name}'\n"
'\n'
'dummy_textspotting_train = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textspotting_data_root,\n'
" ann_file='textspotting_train.json',\n"
' filter_cfg=dict(filter_empty_gt=True, min_size=32),\n'
' pipeline=None)\n'
'\n'
'dummy_fake_textspotting_test = dict(\n'
" type='OCRDataset',\n"
' data_root=dummy_textspotting_data_root,\n'
" ann_file='textspotting_test.json',\n"
' test_mode=True,\n'
' pipeline=None)\n'))
with self.assertRaises(ValueError):
TextSpottingConfigGenerator(
data_root=self.root.name,
dataset_name='dummy',
train_anns=[
dict(
ann_file='textspotting_train.json',
dataset_postfix='1'),
dict(
ann_file='textspotting_train_1.json',
dataset_postfix='1')
],
config_path=self.root.name,
)

View File

@ -1,15 +1,60 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import unittest
from mmocr.datasets.preparers.data_preparer import DatasetPreparer
from mmengine import Config
from mmocr.datasets.preparers import DatasetPreparer
from mmocr.datasets.preparers.data_preparer import (CFG_GENERATORS,
DATA_DUMPERS,
DATA_GATHERERS,
DATA_OBTAINERS,
DATA_PACKERS, DATA_PARSERS)
class Fake:
def __init__(self, *args, **kwargs):
pass
def __call__(self, *args, **kwargs):
return None, None
DATA_OBTAINERS.register_module(module=Fake)
DATA_GATHERERS.register_module(module=Fake)
DATA_PARSERS.register_module(module=Fake)
DATA_DUMPERS.register_module(module=Fake)
DATA_PACKERS.register_module(module=Fake)
CFG_GENERATORS.register_module(module=Fake)
class TestDataPreparer(unittest.TestCase):
def setUp(self) -> None:
self.cfg_path = 'tests/data/preparer'
self.dataset_name = 'dummy'
def _create_config(self):
cfg_path = 'config.py'
cfg = ''
cfg += "data_root = ''\n"
cfg += 'train_preparer=dict(\n'
cfg += ' obtainer=dict(type="Fake"),\n'
cfg += ' gatherer=dict(type="Fake"),\n'
cfg += ' parser=dict(type="Fake"),\n'
cfg += ' packer=dict(type="Fake"),\n'
cfg += ' dumper=dict(type="Fake"),\n'
cfg += ')\n'
cfg += 'test_preparer=dict(\n'
cfg += ' obtainer=dict(type="Fake"),\n'
cfg += ')\n'
cfg += 'cfg_generator=dict(type="Fake")\n'
cfg += f"delete = ['{cfg_path}']\n"
with open(cfg_path, 'w') as f:
f.write(cfg)
return cfg_path
def test_dataset_preparer(self):
preparer = DatasetPreparer(self.cfg_path, self.dataset_name, 'textdet')
preparer()
cfg_path = self._create_config()
cfg = Config.fromfile(cfg_path)
preparer = DatasetPreparer.from_file(cfg)
preparer.run()
self.assertFalse(osp.exists(cfg_path))

View File

@ -21,8 +21,8 @@ class TestDumpers(unittest.TestCase):
task_name='textdet',
category=[dict(id=0, name='text')]))
dumper = JsonDumper(task)
dumper.dump(fake_data, self.root.name, split)
dumper = JsonDumper(task, split, self.root.name)
dumper.dump(fake_data)
with open(osp.join(self.root.name, f'{task}_{split}.json'), 'r') as f:
data = json.load(f)
self.assertEqual(data, fake_data)
@ -31,8 +31,8 @@ class TestDumpers(unittest.TestCase):
task, split = 'kie', 'train'
fake_data = ['test1', 'test2']
dumper = WildreceiptOpensetDumper(task)
dumper.dump(fake_data, self.root.name, split)
dumper = WildreceiptOpensetDumper(task, split, self.root.name)
dumper.dump(fake_data)
with open(osp.join(self.root.name, f'openset_{split}.txt'), 'r') as f:
data = f.read().splitlines()
self.assertEqual(data, fake_data)

View File

@ -0,0 +1,24 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import unittest
from mmocr.datasets.preparers.gatherers import MonoGatherer
class TestMonoGatherer(unittest.TestCase):
def test_mono_text_gatherer(self):
data_root = 'dummpy'
img_dir = 'dummy_img'
ann_dir = 'dummy_ann'
ann_name = 'dummy_ann.json'
split = 'train'
gatherer = MonoGatherer(
data_root=data_root,
img_dir=img_dir,
ann_dir=ann_dir,
ann_name=ann_name,
split=split)
gather_img_dir, ann_path = gatherer()
self.assertEqual(gather_img_dir, osp.join(data_root, img_dir))
self.assertEqual(ann_path, osp.join(data_root, ann_dir, ann_name))

View File

@ -0,0 +1,42 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile
import unittest
import cv2
import numpy as np
from mmocr.datasets.preparers.gatherers import PairGatherer
class TestPairGatherer(unittest.TestCase):
def test_pair_text_gatherer(self):
root = tempfile.TemporaryDirectory()
data_root = root.name
img_dir = 'dummy_img'
ann_dir = 'dummy_ann'
split = 'train'
img = np.random.randint(0, 100, size=(100, 100, 3))
os.makedirs(osp.join(data_root, img_dir))
os.makedirs(osp.join(data_root, ann_dir))
for i in range(10):
cv2.imwrite(osp.join(data_root, img_dir, f'img_{i}.jpg'), img)
f = open(osp.join(data_root, ann_dir, f'img_{i}.txt'), 'w')
f.close()
f = open(osp.join(data_root, ann_dir, 'img_10.mmocr'), 'w')
f.close()
gatherer = PairGatherer(
data_root=data_root,
img_dir=img_dir,
ann_dir=ann_dir,
split=split,
img_suffixes=['.jpg'],
rule=[r'img_(\d+)\.([jJ][pP][gG])', r'img_\1.txt'])
img_list, ann_list = gatherer()
self.assertEqual(len(img_list), 10)
self.assertEqual(len(ann_list), 10)
self.assertNotIn(
osp.join(data_root, ann_dir, 'img_10.mmocr'), ann_list)
root.cleanup()

View File

@ -0,0 +1,62 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
import cv2
import numpy as np
from mmocr.datasets.preparers import TextDetPacker
class TestTextDetPacker(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
img = np.random.randint(0, 255, (30, 20, 3), dtype=np.uint8)
cv2.imwrite(osp.join(self.root.name, 'test_img.jpg'), img)
self.instance = [{
'poly': [0, 0, 0, 10, 10, 20, 20, 0],
'ignore': False
}, {
'box': [0, 0, 10, 20],
'ignore': False
}]
self.img_path = osp.join(self.root.name, 'test_img.jpg')
self.sample = (self.img_path, self.instance)
def test_pack_instance(self):
packer = TextDetPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
self.assertEquals(instance['img_path'], 'test_img.jpg')
self.assertEquals(instance['height'], 30)
self.assertEquals(instance['width'], 20)
self.assertEquals(instance['instances'][0]['polygon'],
[0, 0, 0, 10, 10, 20, 20, 0])
self.assertEquals(instance['instances'][0]['bbox'],
[float(x) for x in [0, 0, 20, 20]])
self.assertEquals(instance['instances'][0]['bbox_label'], 0)
self.assertEquals(instance['instances'][0]['ignore'], False)
self.assertEquals(instance['instances'][1]['polygon'],
[0.0, 0.0, 10.0, 0.0, 10.0, 20.0, 0.0, 20.0])
self.assertEquals(instance['instances'][1]['bbox'],
[float(x) for x in [0, 0, 10, 20]])
self.assertEquals(instance['instances'][1]['bbox_label'], 0)
self.assertEquals(instance['instances'][1]['ignore'], False)
def test_add_meta(self):
packer = TextDetPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
meta = packer.add_meta(instance)
self.assertDictEqual(
meta['metainfo'], {
'dataset_type': 'TextDetDataset',
'task_name': 'textdet',
'category': [{
'id': 0,
'name': 'text'
}]
})
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,79 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
import cv2
import numpy as np
from mmocr.datasets.preparers import TextRecogCropPacker, TextRecogPacker
class TestTextRecogPacker(unittest.TestCase):
def test_pack_instance(self):
packer = TextRecogPacker(data_root='', split='test')
sample = ('test.jpg', 'text')
results = packer.pack_instance(sample)
self.assertDictEqual(
results,
dict(
img_path=osp.join('textrecog_imgs', 'test', 'test.jpg'),
instances=[dict(text='text')]))
def test_add_meta(self):
packer = TextRecogPacker(data_root='', split='test')
sample = [dict(img_path='test.jpg', instances=[dict(text='text')])]
results = packer.add_meta(sample)
self.assertDictEqual(
results,
dict(
metainfo=dict(
dataset_type='TextRecogDataset', task_name='textrecog'),
data_list=sample))
class TestTextRecogCropPacker(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
img = np.random.randint(0, 255, (30, 40, 3), dtype=np.uint8)
cv2.imwrite(osp.join(self.root.name, 'test_img.jpg'), img)
self.instance = [{
'poly': [0, 0, 0, 10, 10, 20, 20, 0],
'ignore': False,
'text': 'text1'
}, {
'box': [0, 0, 10, 20],
'ignore': False,
'text': 'text2'
}]
self.img_path = osp.join(self.root.name, 'test_img.jpg')
self.sample = (self.img_path, self.instance)
def test_pack_instance(self):
packer = TextRecogCropPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
self.assertListEqual(instance, [
dict(
img_path=osp.join('textrecog_imgs', 'test', 'test_img_0.jpg'),
instances=[dict(text='text1')]),
dict(
img_path=osp.join('textrecog_imgs', 'test', 'test_img_1.jpg'),
instances=[dict(text='text2')])
])
def test_add_meta(self):
packer = TextRecogCropPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
results = packer.add_meta([instance])
self.assertDictEqual(
results,
dict(
metainfo=dict(
dataset_type='TextRecogDataset', task_name='textrecog'),
data_list=instance))
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,69 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
import cv2
import numpy as np
from mmocr.datasets.preparers import TextSpottingPacker
class TestTextSpottingPacker(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
img = np.random.randint(0, 255, (30, 20, 3), dtype=np.uint8)
cv2.imwrite(osp.join(self.root.name, 'test_img.jpg'), img)
self.instance = [{
'poly': [0, 0, 0, 10, 10, 20, 20, 0],
'ignore': False,
'text': 'text1'
}, {
'box': [0, 0, 10, 20],
'ignore': False,
'text': 'text2'
}]
self.img_path = osp.join(self.root.name, 'test_img.jpg')
self.sample = (self.img_path, self.instance)
def test_pack_instance(self):
packer = TextSpottingPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
self.assertEquals(instance['img_path'], 'test_img.jpg')
self.assertEquals(instance['height'], 30)
self.assertEquals(instance['width'], 20)
self.assertEquals(instance['instances'][0]['polygon'],
[0, 0, 0, 10, 10, 20, 20, 0])
self.assertEquals(instance['instances'][0]['bbox'],
[float(x) for x in [0, 0, 20, 20]])
self.assertEquals(instance['instances'][0]['bbox_label'], 0)
self.assertEquals(instance['instances'][0]['ignore'], False)
self.assertEquals(instance['instances'][0]['text'], 'text1')
self.assertEquals(instance['instances'][1]['polygon'],
[0.0, 0.0, 10.0, 0.0, 10.0, 20.0, 0.0, 20.0])
self.assertEquals(instance['instances'][1]['bbox'],
[float(x) for x in [0, 0, 10, 20]])
self.assertEquals(instance['instances'][1]['bbox_label'], 0)
self.assertEquals(instance['instances'][1]['ignore'], False)
self.assertEquals(instance['instances'][1]['text'], 'text2')
def test_add_meta(self):
packer = TextSpottingPacker(data_root=self.root.name, split='test')
instance = packer.pack_instance(self.sample)
meta = packer.add_meta(instance)
self.assertDictEqual(
meta, {
'metainfo': {
'dataset_type': 'TextSpottingDataset',
'task_name': 'textspotting',
'category': [{
'id': 0,
'name': 'text'
}]
},
'data_list': instance
})
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,56 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import FUNSDTextDetAnnParser
class TestFUNSDTextDetAnnParser(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def _create_fake_sample(self):
fake_sample = {
'form': [{
'box': [91, 279, 123, 294],
'text': 'Date:',
'label': 'question',
'words': [{
'box': [91, 279, 123, 294],
'text': 'Date:'
}],
'linking': [[0, 16]],
'id': 0
}, {
'box': [92, 310, 130, 324],
'text': 'From:',
'label': 'question',
'words': [{
'box': [92, 310, 130, 324],
'text': ''
}],
'linking': [[1, 22]],
'id': 1
}]
}
ann_path = osp.join(self.root.name, 'funsd.json')
with open(ann_path, 'w') as f:
json.dump(fake_sample, f)
return ann_path
def test_textdet_parsers(self):
ann_path = self._create_fake_sample()
parser = FUNSDTextDetAnnParser(split='train')
_, instances = parser.parse_file('fake.jpg', ann_path)
self.assertEqual(len(instances), 2)
self.assertEqual(instances[0]['text'], 'Date:')
self.assertEqual(instances[0]['ignore'], False)
self.assertEqual(instances[1]['ignore'], True)
self.assertListEqual(instances[0]['poly'],
[91, 279, 123, 279, 123, 294, 91, 294])
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -35,9 +35,9 @@ class TestIC15Parsers(unittest.TestCase):
def test_textdet_parsers(self):
file = self._create_dummy_ic15_det()
parser = ICDARTxtTextDetAnnParser()
parser = ICDARTxtTextDetAnnParser(split='train')
img, instances = parser.parse_file(file, 'train')
img, instances = parser.parse_file(*file)
self.assertEqual(img, file[0])
self.assertEqual(len(instances), 4)
self.assertIn('poly', instances[0])
@ -48,12 +48,15 @@ class TestIC15Parsers(unittest.TestCase):
self.assertEqual(instances[3]['text'], '100,000')
def test_textrecog_parsers(self):
parser = ICDARTxtTextRecogAnnParser()
parser = ICDARTxtTextRecogAnnParser(split='train')
file = self._create_dummy_ic15_recog()
samples = parser.parse_files(file, 'train')
samples = parser.parse_files(self.root.name, file)
self.assertEqual(len(samples), 4)
img, text = samples[0]
self.assertEqual(img, 'word_1.png')
self.assertEqual(img, osp.join(self.root.name, 'word_1.png'))
self.assertEqual(text, 'Genaxis Theatre')
img, text = samples[3]
self.assertEqual(text, '62-,03')
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,81 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import NAFAnnParser
class TestNAFAnnParser(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def _create_fake_sample(self):
fake_sample = {
'fieldBBs': [{
'poly_points': [[1357, 322], [1636, 324], [1636, 402],
[1357, 400]],
'type':
'field',
'id':
'f0',
'isBlank':
1
}, {
'poly_points': [[1831, 352], [1908, 353], [1908, 427],
[1830, 427]],
'type':
'blank',
'id':
'f1',
'isBlank':
1
}],
'textBBs': [{
'poly_points': [[1388, 80], [2003, 82], [2003, 133],
[1388, 132]],
'type':
'text',
'id':
't0'
}, {
'poly_points': [[1065, 366], [1320, 366], [1320, 413],
[1065, 412]],
'type':
'text',
'id':
't1'
}],
'imageFilename':
'004173988_00005.jpg',
'transcriptions': {
'f0': '7/24',
'f1': '9',
't0': 'REGISTRY RETURN RECEIPT.',
't1': 'Date of delivery',
}
}
ann_path = osp.join(self.root.name, 'naf.json')
with open(ann_path, 'w') as f:
json.dump(fake_sample, f)
return ann_path
def test_parsers(self):
ann_path = self._create_fake_sample()
parser = NAFAnnParser(split='train')
_, instances = parser.parse_file('fake.jpg', ann_path)
self.assertEqual(len(instances), 3)
self.assertEqual(instances[0]['ignore'], False)
self.assertEqual(instances[1]['ignore'], False)
self.assertListEqual(instances[2]['poly'],
[1357, 322, 1636, 324, 1636, 402, 1357, 400])
parser = NAFAnnParser(split='train', det=False)
_, instances = parser.parse_file('fake.jpg', ann_path)
self.assertEqual(len(instances), 2)
self.assertEqual(instances[0]['text'], '7/24')
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,43 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers import SROIETextDetAnnParser
from mmocr.utils import list_to_file
class TestSROIETextDetAnnParser(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def _create_dummy_sroie_det(self):
fake_anno = [
'114,54,326,54,326,92,114,92,TAN CHAY YEE',
'60,119,300,119,300,136,60,136,###',
'100,139,267,139,267,162,100,162,ROC NO: 538358-H',
'83,163,277,163,277,183,83,183,NO 2 & 4, JALAN BAYU 4,',
]
ann_file = osp.join(self.root.name, 'sroie_det.txt')
list_to_file(ann_file, fake_anno)
return (osp.join(self.root.name, 'sroie_det.jpg'), ann_file)
def test_textdet_parsers(self):
file = self._create_dummy_sroie_det()
parser = SROIETextDetAnnParser(split='train')
img, instances = parser.parse_file(*file)
self.assertEqual(img, file[0])
self.assertEqual(len(instances), 4)
self.assertIn('poly', instances[0])
self.assertIn('text', instances[0])
self.assertIn('ignore', instances[0])
self.assertEqual(instances[0]['text'], 'TAN CHAY YEE')
self.assertEqual(instances[1]['ignore'], True)
self.assertEqual(instances[3]['text'], 'NO 2 & 4, JALAN BAYU 4,')
self.assertListEqual(instances[2]['poly'],
[100, 139, 267, 139, 267, 162, 100, 162])
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -38,11 +38,11 @@ class TestSVTParsers(unittest.TestCase):
return ann_file
def test_textdet_parsers(self):
parser = SVTTextDetAnnParser(self.root.name)
parser = SVTTextDetAnnParser(split='train')
file = self._create_dummy_svt_det()
samples = parser.parse_files(file, 'train')
samples = parser.parse_files(self.root.name, file)
self.assertEqual(len(samples), 1)
self.assertEqual(osp.basename(samples[0][0]), 'test.jpg')
self.assertEqual(samples[0][0], osp.join(self.root.name, 'test.jpg'))
self.assertEqual(len(samples[0][1]), 3)
self.assertEqual(samples[0][1][0]['text'], 'living')
self.assertEqual(samples[0][1][1]['text'], 'room')
@ -50,3 +50,6 @@ class TestSVTParsers(unittest.TestCase):
self.assertEqual(samples[0][1][0]['poly'],
[375, 253, 611, 253, 611, 328, 375, 328])
self.assertEqual(samples[0][1][0]['ignore'], False)
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -24,9 +24,9 @@ class TestTTParsers(unittest.TestCase):
return (osp.join(self.root.name, 'tt_det.jpg'), ann_file)
def test_textdet_parsers(self):
parser = TotaltextTextDetAnnParser(self.root.name)
parser = TotaltextTextDetAnnParser(split='train')
file = self._create_dummy_tt_det()
img, instances = parser.parse_file(file, 'train')
img, instances = parser.parse_file(*file)
self.assertEqual(img, file[0])
self.assertEqual(len(instances), 3)
self.assertIn('poly', instances[0])
@ -34,3 +34,6 @@ class TestTTParsers(unittest.TestCase):
self.assertIn('ignore', instances[0])
self.assertEqual(instances[0]['text'], 'PERUNDING')
self.assertEqual(instances[2]['ignore'], True)
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -39,8 +39,8 @@ class TestWildReceiptParsers(unittest.TestCase):
list_to_file(self.anno, fake_sample)
def test_textdet_parsers(self):
parser = WildreceiptTextDetAnnParser(self.root.name)
samples = parser.parse_files(self.anno, 'train')
parser = WildreceiptTextDetAnnParser(split='train')
samples = parser.parse_files(self.root.name, self.anno)
self.assertEqual(len(samples), 1)
self.assertEqual(osp.basename(samples[0][0]), 'test.jpg')
instances = samples[0][1]
@ -52,6 +52,9 @@ class TestWildReceiptParsers(unittest.TestCase):
self.assertEqual(instances[1]['ignore'], True)
def test_kie_parsers(self):
parser = WildreceiptKIEAnnParser(self.root.name)
samples = parser.parse_files(self.anno, 'train')
parser = WildreceiptKIEAnnParser(split='train')
samples = parser.parse_files(self.root.name, self.anno)
self.assertEqual(len(samples), 1)
def tearDown(self) -> None:
self.root.cleanup()

View File

@ -0,0 +1,30 @@
# Copyright (c) OpenMMLab. All rights reserved.
import unittest
from mmocr.utils import track_parallel_progress_multi_args
def func(a, b):
return a + b
class TestProcessing(unittest.TestCase):
def test_track_parallel_progress_multi_args(self):
args = ([1, 2, 3], [4, 5, 6])
results = track_parallel_progress_multi_args(func, args, nproc=1)
self.assertEqual(results, [5, 7, 9])
results = track_parallel_progress_multi_args(func, args, nproc=2)
self.assertEqual(results, [5, 7, 9])
with self.assertRaises(AssertionError):
track_parallel_progress_multi_args(func, 1, nproc=1)
with self.assertRaises(AssertionError):
track_parallel_progress_multi_args(func, ([1, 2], 1), nproc=1)
with self.assertRaises(AssertionError):
track_parallel_progress_multi_args(
func, ([1, 2], [1, 2, 3]), nproc=1)

View File

@ -1,8 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import time
import warnings
from mmengine import Config
from mmocr.datasets.preparers import DatasetPreparer
@ -21,6 +24,11 @@ def parse_args():
choices=['textdet', 'textrecog', 'textspotting', 'kie'],
help='Task type. Options are "textdet", "textrecog", "textspotting"'
' and "kie".')
parser.add_argument(
'--splits',
default=['train', 'test', 'val'],
help='A list of the split that would like to prepare.',
nargs='+')
parser.add_argument(
'--overwrite-cfg',
action='store_true',
@ -36,6 +44,35 @@ def parse_args():
return args
def parse_meta(task: str, meta_path: str) -> None:
"""Parse meta file.
Args:
cfg_path (str): Path to meta file.
"""
try:
meta = Config.fromfile(meta_path)
except FileNotFoundError:
return
assert task in meta['Data']['Tasks'], \
f'Task {task} not supported!'
# License related
if meta['Data']['License']['Type']:
print(f"\033[1;33;40mDataset Name: {meta['Name']}")
print(f"License Type: {meta['Data']['License']['Type']}")
print(f"License Link: {meta['Data']['License']['Link']}")
print(f"BibTeX: {meta['Paper']['BibTeX']}\033[0m")
print('\033[1;31;43mMMOCR does not own the dataset. Using this '
'dataset you must accept the license provided by the owners, '
'and cite the corresponding papers appropriately.')
print('If you do not agree with the above license, please cancel '
'the progress immediately by pressing ctrl+c. Otherwise, '
'you are deemed to accept the terms and conditions.\033[0m')
for i in range(5):
print(f'{5-i}...')
time.sleep(1)
def main():
args = parse_args()
for dataset in args.datasets:
@ -43,13 +80,18 @@ def main():
warnings.warn(f'{dataset} is not supported yet. Please check '
'dataset zoo for supported datasets.')
continue
preparer = DatasetPreparer(
cfg_path=args.dataset_zoo_path,
dataset_name=dataset,
task=args.task,
nproc=args.nproc,
overwrite_cfg=args.overwrite_cfg)
preparer()
meta_path = osp.join(args.dataset_zoo_path, dataset, 'metafile.yml')
parse_meta(args.task, meta_path)
cfg_path = osp.join(args.dataset_zoo_path, dataset, args.task + '.py')
cfg = Config.fromfile(cfg_path)
if args.overwrite_cfg and cfg.get('config_generator',
None) is not None:
cfg.config_generator.overwrite = args.overwrite_cfg
cfg.nproc = args.nproc
cfg.task = args.task
cfg.dataset_name = dataset
preparer = DatasetPreparer.from_file(cfg)
preparer.run(args.splits)
if __name__ == '__main__':