mirror of https://github.com/open-mmlab/mmocr.git
58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']
|
|
|
|
mask_rcnn = _base_.pop('model')
|
|
# Adapt Mask R-CNN model to OCR task
|
|
mask_rcnn.update(
|
|
dict(
|
|
data_preprocessor=dict(pad_mask=False),
|
|
rpn_head=dict(
|
|
anchor_generator=dict(
|
|
scales=[4], ratios=[0.17, 0.44, 1.13, 2.90, 7.46])),
|
|
roi_head=dict(
|
|
bbox_head=dict(num_classes=1),
|
|
mask_head=dict(num_classes=1),
|
|
)))
|
|
|
|
model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)
|
|
|
|
train_pipeline = [
|
|
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
|
dict(
|
|
type='LoadOCRAnnotations',
|
|
with_polygon=True,
|
|
with_bbox=True,
|
|
with_label=True,
|
|
),
|
|
dict(
|
|
type='TorchVisionWrapper',
|
|
op='ColorJitter',
|
|
brightness=32.0 / 255,
|
|
saturation=0.5,
|
|
contrast=0.5),
|
|
dict(
|
|
type='RandomResize',
|
|
scale=(640, 640),
|
|
ratio_range=(1.0, 4.125),
|
|
keep_ratio=True),
|
|
dict(type='RandomFlip', prob=0.5),
|
|
dict(type='TextDetRandomCrop', target_size=(640, 640)),
|
|
dict(type='MMOCR2MMDet', poly2mask=True),
|
|
dict(
|
|
type='mmdet.PackDetInputs',
|
|
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
|
|
'scale_factor', 'flip_direction'))
|
|
]
|
|
|
|
test_pipeline = [
|
|
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
|
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
|
dict(
|
|
type='LoadOCRAnnotations',
|
|
with_polygon=True,
|
|
with_bbox=True,
|
|
with_label=True),
|
|
dict(
|
|
type='PackTextDetInputs',
|
|
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
|
]
|