mmocr/configs/textrecog/nrtr/nrtr_modality_toy.py

_base_ = [
    '../../_base_/default_runtime.py',
    '../../_base_/recog_models/nrtr.py',
]

# optimizer
optimizer = dict(type='Adam', lr=1e-3)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=[3, 4])
total_epochs = 6

img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
    dict(
        type='ResizeOCR',
        height=32,
        min_width=32,
        max_width=100,
        keep_aspect_ratio=False),
    dict(type='ToTensorOCR'),
    dict(type='NormalizeOCR', **img_norm_cfg),
    dict(
        type='Collect',
        keys=['img'],
        meta_keys=[
            'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'
        ]),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiRotateAugOCR',
        rotate_degrees=[0, 90, 270],
        transforms=[
            dict(
                type='ResizeOCR',
                height=32,
                min_width=32,
                max_width=100,
                keep_aspect_ratio=False),
            dict(type='ToTensorOCR'),
            dict(type='NormalizeOCR', **img_norm_cfg),
            dict(
                type='Collect',
                keys=['img'],
                meta_keys=[
                    'filename', 'ori_shape', 'img_shape', 'valid_ratio'
                ]),
        ])
]

dataset_type = 'OCRDataset'
img_prefix = 'tests/data/ocr_toy_dataset/imgs'
train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt'
train1 = dict(
    type=dataset_type,
    img_prefix=img_prefix,
    ann_file=train_anno_file1,
    loader=dict(
        type='HardDiskLoader',
        repeat=100,
        parser=dict(
            type='LineStrParser',
            keys=['filename', 'text'],
            keys_idx=[0, 1],
            separator=' ')),
    pipeline=train_pipeline,
    test_mode=False)

train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb'
train2 = dict(
    type=dataset_type,
    img_prefix=img_prefix,
    ann_file=train_anno_file2,
    loader=dict(
        type='LmdbLoader',
        repeat=100,
        parser=dict(
            type='LineStrParser',
            keys=['filename', 'text'],
            keys_idx=[0, 1],
            separator=' ')),
    pipeline=train_pipeline,
    test_mode=False)

test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb'
test = dict(
    type=dataset_type,
    img_prefix=img_prefix,
    ann_file=test_anno_file1,
    loader=dict(
        type='LmdbLoader',
        repeat=1,
        parser=dict(
            type='LineStrParser',
            keys=['filename', 'text'],
            keys_idx=[0, 1],
            separator=' ')),
    pipeline=test_pipeline,
    test_mode=True)

data = dict(
    samples_per_gpu=16,
    workers_per_gpu=2,
    train=dict(type='ConcatDataset', datasets=[train1, train2]),
    val=dict(type='ConcatDataset', datasets=[test]),
    test=dict(type='ConcatDataset', datasets=[test]))

evaluation = dict(interval=1, metric='acc')
version1.0 (#57) * add sar, seg and other components * [feature]: add textsnake_drrg * documentation and dbnet related code * [feature]: add code for kie and textsnake config * [feature]: add CRNN and RobustScanner * Revert "documentation and dbnet related code" * [feature]: add textdet * [feature]: dbnet and docs * fix #9: [feature]: setting norms for contributing (#10) * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #11: update docs (#12) * fix #11: update docs * fix #11: update datasets.md for kie * fix #13: update docs with toc * fix #13: link pr to issue * fix #13: rename section title * fix #13: rename section title (#16) * fix #17: update ckpt path of psenet (#18) * Enhance/synthtext pretrain (#20) * fix 19: add synthtext pretrained model * fix 19: setup.cfg linting * Format readme (#23) * Format readme Signed-off-by: lizz <lizz@sensetime.com> * try Signed-off-by: lizz <lizz@sensetime.com> * Remove redudant config link Signed-off-by: lizz <lizz@sensetime.com> * fix #21: refactor kie dataset & add show_results * fix #21: update sdmgr readme and config * fix #21: update readme of segocr * f-str Signed-off-by: lizz <lizz@sensetime.com> * format again Signed-off-by: lizz <lizz@sensetime.com> * Mkae sort_vertex public api Signed-off-by: lizz <lizz@sensetime.com> * fix #24: rm img_meta from inference (#25) * Fix typos (#26) * Fix typos Signed-off-by: lizz <lizz@sensetime.com> * Ohh Signed-off-by: lizz <lizz@sensetime.com> * [feature]: add nrtr (#28) * [feature]: add nrtr * Rename nrtr_top_dataset.py to nrtr_toy_dataset.py Co-authored-by: Hongbin Sun <hongbin306@gmail.com> * fix #29: update logo (#30) * Feature/iss 33 (#34) * fix #33: update dataset.md * fix #33: pytest for transformer related * Add Github CI Signed-off-by: lizz <lizz@sensetime.com> * rm old ci Signed-off-by: lizz <lizz@sensetime.com> * add contributing and code of conduct Signed-off-by: lizz <lizz@sensetime.com> * Fix ci Signed-off-by: lizz <lizz@sensetime.com> * fix Signed-off-by: lizz <lizz@sensetime.com> * fix Signed-off-by: lizz <lizz@sensetime.com> * Re-enable skipped test Signed-off-by: lizz <lizz@sensetime.com> * good contributing link Signed-off-by: lizz <lizz@sensetime.com> * Remove pytorch 1.3 Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools Signed-off-by: lizz <lizz@sensetime.com> * fix #31: pytest pass * skip cuda Signed-off-by: lizz <lizz@sensetime.com> * try Signed-off-by: lizz <lizz@sensetime.com> * format Signed-off-by: lizz <lizz@sensetime.com> * again Signed-off-by: lizz <lizz@sensetime.com> * Revert "Remove pytorch 1.3" This reverts commit b8d65afea82a9ba9a5ee3315aa6816d21c137c91. * Revert me when rroi is moved to mmcv Signed-off-by: lizz <lizz@sensetime.com> * Revert "Revert "Remove pytorch 1.3"" This reverts commit 1629a64b9e5aecc5536698d988e7151e04c4772d. * Let it pass * fix #35: add nrtr readme; update nrtr config (#36) * fix #37: remove useless code (#38) * np.int -> np.int32 Signed-off-by: lizz <lizz@sensetime.com> * out_size -> output_size Signed-off-by: lizz <lizz@sensetime.com> * Add textdet unit tests (#43) * Fix #41: test fpn_cat * Fix #41: test fpn_cat * Fix #41: test fpn_cat * fix #40: add unit test for recog config, transforms, etc. (#44) * fix #45: remove useless (#46) * fix #47: add unit test for api (#48) * add Dockerfile (#50) * Textsnake tests (#51) * add textsnake unit tests * Remove usage of \ (#49) * Remove usage of \ Signed-off-by: lizz <lizz@sensetime.com> * rebase Signed-off-by: lizz <lizz@sensetime.com> * typos Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools/ Signed-off-by: lizz <lizz@sensetime.com> * Remove usage of \ Signed-off-by: lizz <lizz@sensetime.com> * rebase Signed-off-by: lizz <lizz@sensetime.com> * typos Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools/ Signed-off-by: lizz <lizz@sensetime.com> * typo Signed-off-by: lizz <lizz@sensetime.com> * KIE in keywords Signed-off-by: lizz <lizz@sensetime.com> * some renames Signed-off-by: lizz <lizz@sensetime.com> * kill isort skip Signed-off-by: lizz <lizz@sensetime.com> * aggregation discrimination Signed-off-by: lizz <lizz@sensetime.com> * aggregation discrimination Signed-off-by: lizz <lizz@sensetime.com> * tiny Signed-off-by: lizz <lizz@sensetime.com> * fix bug: model infer on cpu Co-authored-by: Hongbin Sun <hongbin306@gmail.com> * fix #52: update readme (#53) * fix #39: update crnn & robustscanner. (#54) * fix #55: update nrtr readme (#56) Co-authored-by: HolyCrap96 <theochan666@gmail.com> Co-authored-by: quincylin1 <quincylin.333@gmail.com> Co-authored-by: YueXy <yuexiaoyu@sensetime.com> Co-authored-by: yuexy <yuexy@users.noreply.github.com> Co-authored-by: jeffreykuang <kuangzhh@gmail.com> Co-authored-by: lizz <innerlee@users.noreply.github.com> Co-authored-by: lizz <lizz@sensetime.com> Co-authored-by: Theo Chan <46100303+HolyCrap96@users.noreply.github.com> 2021-04-06 22:56:33 +08:00			`_base_ = [`
			`'../../_base_/default_runtime.py',`
			`'../../_base_/recog_models/nrtr.py',`
			`]`

			`# optimizer`
			`optimizer = dict(type='Adam', lr=1e-3)`
			`optimizer_config = dict(grad_clip=None)`
			`# learning policy`
			`lr_config = dict(policy='step', step=[3, 4])`
			`total_epochs = 6`

			`img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])`
			`train_pipeline = [`
			`dict(type='LoadImageFromFile'),`
			`dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),`
			`dict(`
			`type='ResizeOCR',`
			`height=32,`
			`min_width=32,`
			`max_width=100,`
			`keep_aspect_ratio=False),`
			`dict(type='ToTensorOCR'),`
			`dict(type='NormalizeOCR', **img_norm_cfg),`
			`dict(`
			`type='Collect',`
			`keys=['img'],`
			`meta_keys=[`
			`'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio'`
			`]),`
			`]`
			`test_pipeline = [`
			`dict(type='LoadImageFromFile'),`
			`dict(`
			`type='MultiRotateAugOCR',`
			`rotate_degrees=[0, 90, 270],`
			`transforms=[`
			`dict(`
			`type='ResizeOCR',`
			`height=32,`
			`min_width=32,`
			`max_width=100,`
			`keep_aspect_ratio=False),`
			`dict(type='ToTensorOCR'),`
			`dict(type='NormalizeOCR', **img_norm_cfg),`
			`dict(`
			`type='Collect',`
			`keys=['img'],`
			`meta_keys=[`
			`'filename', 'ori_shape', 'img_shape', 'valid_ratio'`
			`]),`
			`])`
			`]`

			`dataset_type = 'OCRDataset'`
			`img_prefix = 'tests/data/ocr_toy_dataset/imgs'`
			`train_anno_file1 = 'tests/data/ocr_toy_dataset/label.txt'`
			`train1 = dict(`
			`type=dataset_type,`
			`img_prefix=img_prefix,`
			`ann_file=train_anno_file1,`
			`loader=dict(`
			`type='HardDiskLoader',`
			`repeat=100,`
			`parser=dict(`
			`type='LineStrParser',`
			`keys=['filename', 'text'],`
			`keys_idx=[0, 1],`
			`separator=' ')),`
			`pipeline=train_pipeline,`
			`test_mode=False)`

			`train_anno_file2 = 'tests/data/ocr_toy_dataset/label.lmdb'`
			`train2 = dict(`
			`type=dataset_type,`
			`img_prefix=img_prefix,`
			`ann_file=train_anno_file2,`
			`loader=dict(`
			`type='LmdbLoader',`
			`repeat=100,`
			`parser=dict(`
			`type='LineStrParser',`
			`keys=['filename', 'text'],`
			`keys_idx=[0, 1],`
			`separator=' ')),`
			`pipeline=train_pipeline,`
			`test_mode=False)`

			`test_anno_file1 = 'tests/data/ocr_toy_dataset/label.lmdb'`
			`test = dict(`
			`type=dataset_type,`
			`img_prefix=img_prefix,`
			`ann_file=test_anno_file1,`
			`loader=dict(`
			`type='LmdbLoader',`
			`repeat=1,`
			`parser=dict(`
			`type='LineStrParser',`
			`keys=['filename', 'text'],`
			`keys_idx=[0, 1],`
			`separator=' ')),`
			`pipeline=test_pipeline,`
			`test_mode=True)`

			`data = dict(`
			`samples_per_gpu=16,`
			`workers_per_gpu=2,`
			`train=dict(type='ConcatDataset', datasets=[train1, train2]),`
			`val=dict(type='ConcatDataset', datasets=[test]),`
			`test=dict(type='ConcatDataset', datasets=[test]))`

			`evaluation = dict(interval=1, metric='acc')`