From c88126f0d28ab5b2e70199ac3fc7ccd0cbc33ea7 Mon Sep 17 00:00:00 2001 From: Hongbin Sun Date: Wed, 7 Apr 2021 10:19:36 +0800 Subject: [PATCH] fix #60: update readme --- README.md | 4 +- configs/textrecog/nrtr/README.md | 4 +- configs/textrecog/nrtr/nrtr_modality_toy.py | 112 ------------ ...mic.py => nrtr_r31_1by16_1by8_academic.py} | 0 .../nrtr/nrtr_r31_1by8_1by4_academic.py | 163 ++++++++++++++++++ 5 files changed, 168 insertions(+), 115 deletions(-) delete mode 100644 configs/textrecog/nrtr/nrtr_modality_toy.py rename configs/textrecog/nrtr/{nrtr_r31_academic.py => nrtr_r31_1by16_1by8_academic.py} (100%) create mode 100644 configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py diff --git a/README.md b/README.md index 96f13097..92d8f382 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,12 @@ This project is released under the [Apache 2.0 license](LICENSE). ## Changelog -v1.0 was released on 07/04/2021. +v0.1.0 was released on 07/04/2021. ## Benchmark and Model Zoo -Please refer to [modelzoo.md](modelzoo.md) for more details. +Please refer to [modelzoo.md](https://mmocr.readthedocs.io/en/latest/modelzoo.html) for more details. ## Installation diff --git a/configs/textrecog/nrtr/README.md b/configs/textrecog/nrtr/README.md index 7d018559..011f76a9 100644 --- a/configs/textrecog/nrtr/README.md +++ b/configs/textrecog/nrtr/README.md @@ -54,8 +54,10 @@ | Methods | Backbone || Regular Text |||| Irregular Text ||download| | :-------: | :---------: | :----: | :----: | :--: | :-: | :--: | :------: | :--: | :-----: | | | | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | -| [NRTR](/configs/textrecog/nrtr/nrtr_r31_academic.py) | R31-1/16-1/8 | 93.9 | 90.0| 93.5 | | 74.5 | 78.5 | 86.5 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_academic_20210406-954db95e.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20210406_010150.log.json) | +| [NRTR](/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py) | R31-1/16-1/8 | 93.9 | 90.0| 93.5 | | 74.5 | 78.5 | 86.5 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_academic_20210406-954db95e.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20210406_010150.log.json) | +| [NRTR](/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py) | R31-1/8-1/4 | 94.7 | 87.5| 93.3 | | 75.1 | 78.9 | 87.9 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_1by8_1by4_academic_20210406-ce16e7cc.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20210406_160845.log.json) | **Notes:** - `R31-1/16-1/8` means the height of feature from backbone is 1/16 of input image, where 1/8 for width. +- `R31-1/8-1/4` means the height of feature from backbone is 1/8 of input image, where 1/4 for width. diff --git a/configs/textrecog/nrtr/nrtr_modality_toy.py b/configs/textrecog/nrtr/nrtr_modality_toy.py deleted file mode 100644 index e8201c6f..00000000 --- a/configs/textrecog/nrtr/nrtr_modality_toy.py +++ /dev/null @@ -1,112 +0,0 @@ -_base_ = [ - '../../_base_/default_runtime.py', - '../../_base_/recog_models/nrtr.py', -] - -# optimizer -optimizer = dict(type='Adam', lr=1e-3) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict(policy='step', step=[3, 4]) -total_epochs = 6 - -img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), - dict( - type='ResizeOCR', - height=32, - min_width=32, - max_width=100, - keep_aspect_ratio=False), - dict(type='ToTensorOCR'), - dict(type='NormalizeOCR', **img_norm_cfg), - dict( - type='Collect', - keys=['img'], - meta_keys=[ - 'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio' - ]), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiRotateAugOCR', - rotate_degrees=[0, 90, 270], - transforms=[ - dict( - type='ResizeOCR', - height=32, - min_width=32, - max_width=100, - keep_aspect_ratio=False), - dict(type='ToTensorOCR'), - dict(type='NormalizeOCR', **img_norm_cfg), - dict( - type='Collect', - keys=['img'], - meta_keys=[ - 'filename', 'ori_shape', 'img_shape', 'valid_ratio' - ]), - ]) -] - -dataset_type = 'OCRDataset' -img_prefix = 'tests/data/ocr_toy_dataset/imgs' -train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt' -train1 = dict( - type=dataset_type, - img_prefix=img_prefix, - ann_file=train_anno_file1, - loader=dict( - type='HardDiskLoader', - repeat=100, - parser=dict( - type='LineStrParser', - keys=['filename', 'text'], - keys_idx=[0, 1], - separator=' ')), - pipeline=train_pipeline, - test_mode=False) - -train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb' -train2 = dict( - type=dataset_type, - img_prefix=img_prefix, - ann_file=train_anno_file2, - loader=dict( - type='LmdbLoader', - repeat=100, - parser=dict( - type='LineStrParser', - keys=['filename', 'text'], - keys_idx=[0, 1], - separator=' ')), - pipeline=train_pipeline, - test_mode=False) - -test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb' -test = dict( - type=dataset_type, - img_prefix=img_prefix, - ann_file=test_anno_file1, - loader=dict( - type='LmdbLoader', - repeat=1, - parser=dict( - type='LineStrParser', - keys=['filename', 'text'], - keys_idx=[0, 1], - separator=' ')), - pipeline=test_pipeline, - test_mode=True) - -data = dict( - samples_per_gpu=16, - workers_per_gpu=2, - train=dict(type='ConcatDataset', datasets=[train1, train2]), - val=dict(type='ConcatDataset', datasets=[test]), - test=dict(type='ConcatDataset', datasets=[test])) - -evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/nrtr/nrtr_r31_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py similarity index 100% rename from configs/textrecog/nrtr/nrtr_r31_academic.py rename to configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py diff --git a/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py new file mode 100644 index 00000000..b003b823 --- /dev/null +++ b/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py @@ -0,0 +1,163 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/nrtr.py' +] + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='NRTR', + backbone=dict( + type='ResNet31OCR', + layers=[1, 2, 5, 3], + channels=[32, 64, 128, 256, 512, 512], + stage4_pool_cfg=dict(kernel_size=(2, 1), stride=(2, 1)), + last_stage_pool=False), + encoder=dict(type='TFEncoder'), + decoder=dict(type='TFDecoder'), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=40) + +# optimizer +optimizer = dict(type='Adam', lr=1e-3) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +total_epochs = 6 + +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=160, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=160, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'valid_ratio' + ]), + ]) +] + +dataset_type = 'OCRDataset' + +train_prefix = 'data/mixture/' + +train_img_prefix1 = train_prefix + \ + 'SynthText/synthtext/SynthText_patch_horizontal' +train_img_prefix2 = train_prefix + 'Syn90k/mnt/ramdisk/max/90kDICT32px' + +train_ann_file1 = train_prefix + 'SynthText/label.lmdb', +train_ann_file2 = train_prefix + 'Syn90k/label.lmdb' + +train1 = dict( + type=dataset_type, + img_prefix=train_img_prefix1, + ann_file=train_ann_file1, + loader=dict( + type='LmdbLoader', + repeat=1, + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=train_pipeline, + test_mode=False) + +train2 = {key: value for key, value in train1.items()} +train2['img_prefix'] = train_img_prefix2 +train2['ann_file'] = train_ann_file2 + +test_prefix = 'data/mixture/' +test_img_prefix1 = test_prefix + 'IIIT5K/' +test_img_prefix2 = test_prefix + 'svt/' +test_img_prefix3 = test_prefix + 'icdar_2013/' +test_img_prefix4 = test_prefix + 'icdar_2015/' +test_img_prefix5 = test_prefix + 'svtp/' +test_img_prefix6 = test_prefix + 'ct80/' + +test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt' +test_ann_file2 = test_prefix + 'svt/test_label.txt' +test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt' +test_ann_file4 = test_prefix + 'icdar_2015/test_label.txt' +test_ann_file5 = test_prefix + 'svtp/test_label.txt' +test_ann_file6 = test_prefix + 'ct80/test_label.txt' + +test1 = dict( + type=dataset_type, + img_prefix=test_img_prefix1, + ann_file=test_ann_file1, + loader=dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=test_pipeline, + test_mode=True) + +test2 = {key: value for key, value in test1.items()} +test2['img_prefix'] = test_img_prefix2 +test2['ann_file'] = test_ann_file2 + +test3 = {key: value for key, value in test1.items()} +test3['img_prefix'] = test_img_prefix3 +test3['ann_file'] = test_ann_file3 + +test4 = {key: value for key, value in test1.items()} +test4['img_prefix'] = test_img_prefix4 +test4['ann_file'] = test_ann_file4 + +test5 = {key: value for key, value in test1.items()} +test5['img_prefix'] = test_img_prefix5 +test5['ann_file'] = test_ann_file5 + +test6 = {key: value for key, value in test1.items()} +test6['img_prefix'] = test_img_prefix6 +test6['ann_file'] = test_ann_file6 + +data = dict( + samples_per_gpu=128, + workers_per_gpu=4, + train=dict(type='ConcatDataset', datasets=[train1, train2]), + val=dict( + type='ConcatDataset', + datasets=[test1, test2, test3, test4, test5, test6]), + test=dict( + type='ConcatDataset', + datasets=[test1, test2, test3, test4, test5, test6])) + +evaluation = dict(interval=1, metric='acc')