diff --git a/configs/textrecog/_base_/datasets/mjsynth.py b/configs/textrecog/_base_/datasets/mjsynth.py index bd06bf1b..5f26e937 100644 --- a/configs/textrecog/_base_/datasets/mjsynth.py +++ b/configs/textrecog/_base_/datasets/mjsynth.py @@ -1,17 +1,7 @@ -mjsynth_textrecog_data_root = 'data/rec/Syn90k/' +mjsynth_textrecog_data_root = 'data/mjsynth' -mjsynth_textrecog_test = dict( +mjsynth_textrecog_train = dict( type='OCRDataset', data_root=mjsynth_textrecog_data_root, - data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'), - ann_file='train_labels.json', - test_mode=False, - pipeline=None) - -mjsynth_sub_textrecog_train = dict( - type='OCRDataset', - data_root=mjsynth_textrecog_data_root, - data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'), - ann_file='subset_train_labels.json', - test_mode=False, + ann_file='textrecog_train.json', pipeline=None) diff --git a/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py b/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py index 1690fc70..f6785a39 100644 --- a/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py +++ b/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py @@ -24,7 +24,7 @@ param_scheduler = [ # dataset settings train_list = [ - _base_.mjsynth_textrecog_test, _base_.synthtext_an_textrecog_train + _base_.mjsynth_textrecog_train, _base_.synthtext_an_textrecog_train ] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, diff --git a/configs/textrecog/abinet/abinet_20e_st-an_mj.py b/configs/textrecog/abinet/abinet_20e_st-an_mj.py index 44190523..34bed99a 100644 --- a/configs/textrecog/abinet/abinet_20e_st-an_mj.py +++ b/configs/textrecog/abinet/abinet_20e_st-an_mj.py @@ -26,7 +26,7 @@ param_scheduler = [ # dataset settings train_list = [ - _base_.mjsynth_textrecog_test, _base_.synthtext_an_textrecog_train + _base_.mjsynth_textrecog_train, _base_.synthtext_an_textrecog_train ] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, diff --git a/configs/textrecog/aster/aster_resnet45_6e_st_mj.py b/configs/textrecog/aster/aster_resnet45_6e_st_mj.py index e61f08b4..d68e4556 100644 --- a/configs/textrecog/aster/aster_resnet45_6e_st_mj.py +++ b/configs/textrecog/aster/aster_resnet45_6e_st_mj.py @@ -15,7 +15,7 @@ _base_ = [ # dataset settings train_list = [ - _base_.mjsynth_textrecog_test, + _base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train, ] test_list = [ diff --git a/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py b/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py index 5dffa009..3cb2223e 100644 --- a/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py +++ b/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py @@ -12,7 +12,7 @@ _base_ = [ '_base_crnn_mini-vgg.py', ] # dataset settings -train_list = [_base_.mjsynth_textrecog_test] +train_list = [_base_.mjsynth_textrecog_train] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, _base_.svtp_textrecog_test, diff --git a/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py b/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py index 41411c0b..7ab66ab7 100644 --- a/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py +++ b/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py @@ -23,7 +23,7 @@ param_scheduler = [ # dataset settings train_list = [ - _base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train, + _base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train, _base_.synthtext_add_textrecog_train ] test_list = [ diff --git a/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py b/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py index 427e9285..a1e77843 100644 --- a/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py +++ b/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py @@ -20,7 +20,7 @@ param_scheduler = [ ] # dataset settings -train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train] +train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, _base_.svtp_textrecog_test, diff --git a/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py b/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py index eb9dfb9a..2d3f019d 100644 --- a/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py +++ b/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py @@ -20,7 +20,7 @@ param_scheduler = [ ] # dataset settings -train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train] +train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, _base_.svtp_textrecog_test, diff --git a/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py b/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py index d0998188..94251c92 100644 --- a/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py +++ b/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py @@ -13,7 +13,7 @@ _base_ = [ ] # dataset settings -train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train] +train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, _base_.svtp_textrecog_test, diff --git a/configs/textrecog/svtr/svtr-tiny_20e_st_mj.py b/configs/textrecog/svtr/svtr-tiny_20e_st_mj.py index 1f217d5b..fec2c2ae 100644 --- a/configs/textrecog/svtr/svtr-tiny_20e_st_mj.py +++ b/configs/textrecog/svtr/svtr-tiny_20e_st_mj.py @@ -41,7 +41,7 @@ param_scheduler = [ ] # dataset settings -train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train] +train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train] test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, _base_.svtp_textrecog_test, diff --git a/dataset_zoo/mjsynth/metafile.yml b/dataset_zoo/mjsynth/metafile.yml new file mode 100644 index 00000000..42491ae4 --- /dev/null +++ b/dataset_zoo/mjsynth/metafile.yml @@ -0,0 +1,37 @@ +Name: 'Synthetic Word Dataset (MJSynth/Syn90k)' +Paper: + Title: Reading Text in the Wild with Convolutional Neural Networks + URL: https://arxiv.org/pdf/1412.1842.pdf + Venue: International Journal of Computer Vision + Year: '2016' + BibTeX: '@InProceedings{Jaderberg14c, + author = "Max Jaderberg and Karen Simonyan and Andrea Vedaldi and Andrew Zisserman", + title = "Synthetic Data and Artificial Neural Networks for Natural Scene Text Recognition", + booktitle = "Workshop on Deep Learning, NIPS", + year = "2014", + } + + @Article{Jaderberg16, + author = "Max Jaderberg and Karen Simonyan and Andrea Vedaldi and Andrew Zisserman", + title = "Reading Text in the Wild with Convolutional Neural Networks", + journal = "International Journal of Computer Vision", + number = "1", + volume = "116", + pages = "1--20", + month = "jan", + year = "2016", + }' +Data: + Website: https://www.robots.ox.ac.uk/~vgg/data/text/ + Language: + - English + Scene: + - Synthesis + Granularity: + - Word + Tasks: + - textrecog + License: + Type: N/A + Link: N/A + Format: .txt diff --git a/dataset_zoo/mjsynth/sample_anno.md b/dataset_zoo/mjsynth/sample_anno.md new file mode 100644 index 00000000..fe44e878 --- /dev/null +++ b/dataset_zoo/mjsynth/sample_anno.md @@ -0,0 +1,6 @@ +**Text Recognition** + +```txt +./3000/7/182_slinking_71711.jpg 71711 +./3000/7/182_REMODELERS_64541.jpg 64541 +``` diff --git a/dataset_zoo/mjsynth/textrecog.py b/dataset_zoo/mjsynth/textrecog.py new file mode 100644 index 00000000..f54b7044 --- /dev/null +++ b/dataset_zoo/mjsynth/textrecog.py @@ -0,0 +1,40 @@ +data_root = 'data/mjsynth' +cache_path = 'data/cache' + +train_preparer = dict( + obtainer=dict( + type='NaiveDataObtainer', + cache_path=cache_path, + files=[ + dict( + url='https://thor.robots.ox.ac.uk/~vgg/data/text/' + 'mjsynth.tar.gz', + save_name='mjsynth.tar.gz', + md5='7bf2b60ad935eaf64e5b606f782d68e5', + split=['train'], + content=['image', 'annotation'], + mapping=[ + [ + 'mjsynth/mnt/ramdisk/max/90kDICT32px/*/', + 'textrecog_imgs/train/' + ], + [ + 'mjsynth/mnt/ramdisk/max/90kDICT32px/annotation.txt', + 'annotations/annotation.txt' + ] + ]), + ]), + gatherer=dict(type='MonoGatherer', ann_name='annotation.txt'), + parser=dict( + type='ICDARTxtTextRecogAnnParser', + separator=' ', + format='img text', + remove_strs=None), + packer=dict(type='TextRecogPacker'), + dumper=dict(type='JsonDumper'), +) + +delete = ['mjsynth', 'annotations'] + +config_generator = dict( + type='TextRecogConfigGenerator', data_root=data_root, test_anns=None) diff --git a/docs/en/user_guides/config.md b/docs/en/user_guides/config.md index 83358ace..c2573d84 100644 --- a/docs/en/user_guides/config.md +++ b/docs/en/user_guides/config.md @@ -291,7 +291,7 @@ For example, for text recognition tasks, Syn90k is used as the training set, whi ```Python # text recognition dataset configuration -mjsynth_textrecog_test = dict( +mjsynth_textrecog_train = dict( type='OCRDataset', data_root='data/rec/Syn90k/', data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'), @@ -374,7 +374,7 @@ train_dataloader = dict( sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type='ConcatDataset', - datasets=[mjsynth_textrecog_test], + datasets=[mjsynth_textrecog_train], pipeline=train_pipeline)) val_dataloader = dict( batch_size=1, diff --git a/docs/en/user_guides/dataset_prepare.md b/docs/en/user_guides/dataset_prepare.md index d4470322..865503a8 100644 --- a/docs/en/user_guides/dataset_prepare.md +++ b/docs/en/user_guides/dataset_prepare.md @@ -171,7 +171,7 @@ _base_ = [ # Import all dataset configurations you want to use ] # List of training datasets -train_list = [_base_.mjsynth_textrecog_test] +train_list = [_base_.mjsynth_textrecog_train] # List of testing datasets test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/user_guides/config.md index 4725998c..fd16af58 100644 --- a/docs/zh_cn/user_guides/config.md +++ b/docs/zh_cn/user_guides/config.md @@ -288,7 +288,7 @@ test_cfg = dict(type='TestLoop') ```Python # 识别数据集配置 -mjsynth_textrecog_test = dict( +mjsynth_textrecog_train = dict( type='OCRDataset', data_root='data/rec/Syn90k/', data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'), @@ -373,7 +373,7 @@ train_dataloader = dict( sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type='ConcatDataset', - datasets=[mjsynth_textrecog_test], + datasets=[mjsynth_textrecog_train], pipeline=train_pipeline)) val_dataloader = dict( batch_size=1, diff --git a/docs/zh_cn/user_guides/dataset_prepare.md b/docs/zh_cn/user_guides/dataset_prepare.md index 439a7528..86dc7a8b 100644 --- a/docs/zh_cn/user_guides/dataset_prepare.md +++ b/docs/zh_cn/user_guides/dataset_prepare.md @@ -171,7 +171,7 @@ _base_ = [ # 导入所有需要使用的数据集配置 ] # 训练集列表 -train_list = [_base_.mjsynth_textrecog_test] +train_list = [_base_.mjsynth_textrecog_train] # 测试集列表 test_list = [ _base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test, _base_.svt_textrecog_test, diff --git a/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py b/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py index e4ed1f9e..c781b9ef 100644 --- a/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py +++ b/mmocr/datasets/preparers/obtainers/naive_data_obtainer.py @@ -186,7 +186,8 @@ class NaiveDataObtainer: if '*' in src: mkdir_or_exist(dst) for f in glob.glob(src): - if not osp.exists(osp.join(dst, osp.basename(f))): + if not osp.exists( + osp.join(dst, osp.relpath(f, self.data_root))): shutil.move(f, dst) elif osp.exists(src) and not osp.exists(dst): diff --git a/mmocr/datasets/preparers/packers/textrecog_packer.py b/mmocr/datasets/preparers/packers/textrecog_packer.py index 0779fa86..6af70064 100644 --- a/mmocr/datasets/preparers/packers/textrecog_packer.py +++ b/mmocr/datasets/preparers/packers/textrecog_packer.py @@ -50,11 +50,8 @@ class TextRecogPacker(BasePacker): """ img_name, text = sample - # TODO: remove hard code - packed_instance = dict( - instances=[dict(text=text)], - img_path=osp.join('textrecog_imgs', self.split, - osp.basename(img_name))) + img_name = osp.relpath(img_name, self.data_root) + packed_instance = dict(instances=[dict(text=text)], img_path=img_name) return packed_instance diff --git a/mmocr/datasets/preparers/parsers/icdar_txt_parser.py b/mmocr/datasets/preparers/parsers/icdar_txt_parser.py index d420fd5d..e90d5d7b 100644 --- a/mmocr/datasets/preparers/parsers/icdar_txt_parser.py +++ b/mmocr/datasets/preparers/parsers/icdar_txt_parser.py @@ -122,6 +122,6 @@ class ICDARTxtTextRecogAnnParser(BaseParser): if text == self.ignore: continue img_name = anno['img'] - samples.append((osp.join(img_dir, osp.basename(img_name)), text)) + samples.append((osp.join(img_dir, img_name), text)) return samples diff --git a/tests/test_datasets/test_preparers/test_packers/test_textrecog_packer.py b/tests/test_datasets/test_preparers/test_packers/test_textrecog_packer.py index 54e68415..5e875af3 100644 --- a/tests/test_datasets/test_preparers/test_packers/test_textrecog_packer.py +++ b/tests/test_datasets/test_preparers/test_packers/test_textrecog_packer.py @@ -13,14 +13,11 @@ class TestTextRecogPacker(unittest.TestCase): def test_pack_instance(self): - packer = TextRecogPacker(data_root='', split='test') - sample = ('test.jpg', 'text') + packer = TextRecogPacker(data_root='data/test/', split='test') + sample = ('data/test/test.jpg', 'text') results = packer.pack_instance(sample) self.assertDictEqual( - results, - dict( - img_path=osp.join('textrecog_imgs', 'test', 'test.jpg'), - instances=[dict(text='text')])) + results, dict(img_path='test.jpg', instances=[dict(text='text')])) def test_add_meta(self): packer = TextRecogPacker(data_root='', split='test')