mirror of
https://github.com/open-mmlab/mmocr.git
synced 2025-06-03 21:54:47 +08:00
* add sar, seg and other components * [feature]: add textsnake_drrg * documentation and dbnet related code * [feature]: add code for kie and textsnake config * [feature]: add CRNN and RobustScanner * Revert "documentation and dbnet related code" * [feature]: add textdet * [feature]: dbnet and docs * fix #9: [feature]: setting norms for contributing (#10) * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #9: [feature]: setting norms for contributing * fix #11: update docs (#12) * fix #11: update docs * fix #11: update datasets.md for kie * fix #13: update docs with toc * fix #13: link pr to issue * fix #13: rename section title * fix #13: rename section title (#16) * fix #17: update ckpt path of psenet (#18) * Enhance/synthtext pretrain (#20) * fix 19: add synthtext pretrained model * fix 19: setup.cfg linting * Format readme (#23) * Format readme Signed-off-by: lizz <lizz@sensetime.com> * try Signed-off-by: lizz <lizz@sensetime.com> * Remove redudant config link Signed-off-by: lizz <lizz@sensetime.com> * fix #21: refactor kie dataset & add show_results * fix #21: update sdmgr readme and config * fix #21: update readme of segocr * f-str Signed-off-by: lizz <lizz@sensetime.com> * format again Signed-off-by: lizz <lizz@sensetime.com> * Mkae sort_vertex public api Signed-off-by: lizz <lizz@sensetime.com> * fix #24: rm img_meta from inference (#25) * Fix typos (#26) * Fix typos Signed-off-by: lizz <lizz@sensetime.com> * Ohh Signed-off-by: lizz <lizz@sensetime.com> * [feature]: add nrtr (#28) * [feature]: add nrtr * Rename nrtr_top_dataset.py to nrtr_toy_dataset.py Co-authored-by: Hongbin Sun <hongbin306@gmail.com> * fix #29: update logo (#30) * Feature/iss 33 (#34) * fix #33: update dataset.md * fix #33: pytest for transformer related * Add Github CI Signed-off-by: lizz <lizz@sensetime.com> * rm old ci Signed-off-by: lizz <lizz@sensetime.com> * add contributing and code of conduct Signed-off-by: lizz <lizz@sensetime.com> * Fix ci Signed-off-by: lizz <lizz@sensetime.com> * fix Signed-off-by: lizz <lizz@sensetime.com> * fix Signed-off-by: lizz <lizz@sensetime.com> * Re-enable skipped test Signed-off-by: lizz <lizz@sensetime.com> * good contributing link Signed-off-by: lizz <lizz@sensetime.com> * Remove pytorch 1.3 Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools Signed-off-by: lizz <lizz@sensetime.com> * fix #31: pytest pass * skip cuda Signed-off-by: lizz <lizz@sensetime.com> * try Signed-off-by: lizz <lizz@sensetime.com> * format Signed-off-by: lizz <lizz@sensetime.com> * again Signed-off-by: lizz <lizz@sensetime.com> * Revert "Remove pytorch 1.3" This reverts commit b8d65afea82a9ba9a5ee3315aa6816d21c137c91. * Revert me when rroi is moved to mmcv Signed-off-by: lizz <lizz@sensetime.com> * Revert "Revert "Remove pytorch 1.3"" This reverts commit 1629a64b9e5aecc5536698d988e7151e04c4772d. * Let it pass * fix #35: add nrtr readme; update nrtr config (#36) * fix #37: remove useless code (#38) * np.int -> np.int32 Signed-off-by: lizz <lizz@sensetime.com> * out_size -> output_size Signed-off-by: lizz <lizz@sensetime.com> * Add textdet unit tests (#43) * Fix #41: test fpn_cat * Fix #41: test fpn_cat * Fix #41: test fpn_cat * fix #40: add unit test for recog config, transforms, etc. (#44) * fix #45: remove useless (#46) * fix #47: add unit test for api (#48) * add Dockerfile (#50) * Textsnake tests (#51) * add textsnake unit tests * Remove usage of \ (#49) * Remove usage of \ Signed-off-by: lizz <lizz@sensetime.com> * rebase Signed-off-by: lizz <lizz@sensetime.com> * typos Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools/ Signed-off-by: lizz <lizz@sensetime.com> * Remove usage of \ Signed-off-by: lizz <lizz@sensetime.com> * rebase Signed-off-by: lizz <lizz@sensetime.com> * typos Signed-off-by: lizz <lizz@sensetime.com> * Remove test dependency on tools/ Signed-off-by: lizz <lizz@sensetime.com> * typo Signed-off-by: lizz <lizz@sensetime.com> * KIE in keywords Signed-off-by: lizz <lizz@sensetime.com> * some renames Signed-off-by: lizz <lizz@sensetime.com> * kill isort skip Signed-off-by: lizz <lizz@sensetime.com> * aggregation discrimination Signed-off-by: lizz <lizz@sensetime.com> * aggregation discrimination Signed-off-by: lizz <lizz@sensetime.com> * tiny Signed-off-by: lizz <lizz@sensetime.com> * fix bug: model infer on cpu Co-authored-by: Hongbin Sun <hongbin306@gmail.com> * fix #52: update readme (#53) * fix #39: update crnn & robustscanner. (#54) * fix #55: update nrtr readme (#56) Co-authored-by: HolyCrap96 <theochan666@gmail.com> Co-authored-by: quincylin1 <quincylin.333@gmail.com> Co-authored-by: YueXy <yuexiaoyu@sensetime.com> Co-authored-by: yuexy <yuexy@users.noreply.github.com> Co-authored-by: jeffreykuang <kuangzhh@gmail.com> Co-authored-by: lizz <innerlee@users.noreply.github.com> Co-authored-by: lizz <lizz@sensetime.com> Co-authored-by: Theo Chan <46100303+HolyCrap96@users.noreply.github.com>
161 lines
4.6 KiB
Python
161 lines
4.6 KiB
Python
_base_ = ['../../_base_/default_runtime.py']
|
|
|
|
# optimizer
|
|
optimizer = dict(type='Adam', lr=1e-4)
|
|
optimizer_config = dict(grad_clip=None)
|
|
# learning policy
|
|
lr_config = dict(policy='step', step=[3, 4])
|
|
total_epochs = 5
|
|
|
|
label_convertor = dict(
|
|
type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
|
|
|
model = dict(
|
|
type='SegRecognizer',
|
|
backbone=dict(
|
|
type='ResNet31OCR',
|
|
layers=[1, 2, 5, 3],
|
|
channels=[32, 64, 128, 256, 512, 512],
|
|
out_indices=[0, 1, 2, 3],
|
|
stage4_pool_cfg=dict(kernel_size=2, stride=2),
|
|
last_stage_pool=True),
|
|
neck=dict(
|
|
type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256),
|
|
head=dict(
|
|
type='SegHead',
|
|
in_channels=256,
|
|
upsample_param=dict(scale_factor=2.0, mode='nearest')),
|
|
loss=dict(
|
|
type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=True),
|
|
label_convertor=label_convertor)
|
|
|
|
find_unused_parameters = True
|
|
|
|
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
|
|
|
gt_label_convertor = dict(
|
|
type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
|
|
|
train_pipeline = [
|
|
dict(type='LoadImageFromFile'),
|
|
dict(
|
|
type='RandomPaddingOCR',
|
|
max_ratio=[0.15, 0.2, 0.15, 0.2],
|
|
box_type='char_quads'),
|
|
dict(type='OpencvToPil'),
|
|
dict(
|
|
type='RandomRotateImageBox',
|
|
min_angle=-17,
|
|
max_angle=17,
|
|
box_type='char_quads'),
|
|
dict(type='PilToOpencv'),
|
|
dict(
|
|
type='ResizeOCR',
|
|
height=64,
|
|
min_width=64,
|
|
max_width=512,
|
|
keep_aspect_ratio=True),
|
|
dict(
|
|
type='OCRSegTargets',
|
|
label_convertor=gt_label_convertor,
|
|
box_type='char_quads'),
|
|
dict(type='RandomRotateTextDet', rotate_ratio=0.5, max_angle=15),
|
|
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
|
dict(type='ToTensorOCR'),
|
|
dict(type='FancyPCA'),
|
|
dict(type='NormalizeOCR', **img_norm_cfg),
|
|
dict(
|
|
type='CustomFormatBundle',
|
|
keys=['gt_kernels'],
|
|
visualize=dict(flag=False, boundary_key=None),
|
|
call_super=False),
|
|
dict(
|
|
type='Collect',
|
|
keys=['img', 'gt_kernels'],
|
|
meta_keys=['filename', 'ori_shape', 'img_shape'])
|
|
]
|
|
|
|
test_pipeline = [
|
|
dict(type='LoadImageFromFile'),
|
|
dict(
|
|
type='ResizeOCR',
|
|
height=64,
|
|
min_width=64,
|
|
max_width=None,
|
|
keep_aspect_ratio=True),
|
|
dict(type='ToTensorOCR'),
|
|
dict(type='NormalizeOCR', **img_norm_cfg),
|
|
dict(type='CustomFormatBundle', call_super=False),
|
|
dict(
|
|
type='Collect',
|
|
keys=['img'],
|
|
meta_keys=['filename', 'ori_shape', 'img_shape'])
|
|
]
|
|
|
|
train_img_root = 'data/mixture/'
|
|
|
|
train_img_prefix = train_img_root + 'SynthText'
|
|
|
|
train_ann_file = train_img_root + 'SynthText/instances_train.txt'
|
|
|
|
train = dict(
|
|
type='OCRSegDataset',
|
|
img_prefix=train_img_prefix,
|
|
ann_file=train_ann_file,
|
|
loader=dict(
|
|
type='HardDiskLoader',
|
|
repeat=1,
|
|
parser=dict(
|
|
type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
|
|
pipeline=train_pipeline,
|
|
test_mode=False)
|
|
|
|
dataset_type = 'OCRDataset'
|
|
test_prefix = 'data/mixture/'
|
|
|
|
test_img_prefix1 = test_prefix + 'IIIT5K/'
|
|
test_img_prefix2 = test_prefix + 'svt/'
|
|
test_img_prefix3 = test_prefix + 'icdar_2013/'
|
|
test_img_prefix4 = test_prefix + 'ct80/'
|
|
|
|
test_ann_file1 = test_prefix + 'IIIT5K/test_label.txt'
|
|
test_ann_file2 = test_prefix + 'svt/test_label.txt'
|
|
test_ann_file3 = test_prefix + 'icdar_2013/test_label_1015.txt'
|
|
test_ann_file4 = test_prefix + 'ct80/test_label.txt'
|
|
|
|
test1 = dict(
|
|
type=dataset_type,
|
|
img_prefix=test_img_prefix1,
|
|
ann_file=test_ann_file1,
|
|
loader=dict(
|
|
type='HardDiskLoader',
|
|
repeat=1,
|
|
parser=dict(
|
|
type='LineStrParser',
|
|
keys=['filename', 'text'],
|
|
keys_idx=[0, 1],
|
|
separator=' ')),
|
|
pipeline=test_pipeline,
|
|
test_mode=True)
|
|
|
|
test2 = {key: value for key, value in test1.items()}
|
|
test2['img_prefix'] = test_img_prefix2
|
|
test2['ann_file'] = test_ann_file2
|
|
|
|
test3 = {key: value for key, value in test1.items()}
|
|
test3['img_prefix'] = test_img_prefix3
|
|
test3['ann_file'] = test_ann_file3
|
|
|
|
test4 = {key: value for key, value in test1.items()}
|
|
test4['img_prefix'] = test_img_prefix4
|
|
test4['ann_file'] = test_ann_file4
|
|
|
|
data = dict(
|
|
samples_per_gpu=16,
|
|
workers_per_gpu=2,
|
|
train=dict(type='ConcatDataset', datasets=[train]),
|
|
val=dict(type='ConcatDataset', datasets=[test1, test2, test3, test4]),
|
|
test=dict(type='ConcatDataset', datasets=[test1, test2, test3, test4]))
|
|
|
|
evaluation = dict(interval=1, metric='acc')
|