mmocr/configs/textdet/maskrcnn/_base_mask-rcnn_resnet50_fp...

_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']

mask_rcnn = _base_.pop('model')
# Adapt Mask R-CNN model to OCR task
mask_rcnn.update(
    dict(
        data_preprocessor=dict(pad_mask=False),
        rpn_head=dict(
            anchor_generator=dict(
                scales=[4], ratios=[0.17, 0.44, 1.13, 2.90, 7.46])),
        roi_head=dict(
            bbox_head=dict(num_classes=1),
            mask_head=dict(num_classes=1),
        )))

model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)

train_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(
        type='LoadOCRAnnotations',
        with_polygon=True,
        with_bbox=True,
        with_label=True,
    ),
    dict(
        type='TorchVisionWrapper',
        op='ColorJitter',
        brightness=32.0 / 255,
        saturation=0.5,
        contrast=0.5),
    dict(
        type='RandomResize',
        scale=(640, 640),
        ratio_range=(1.0, 4.125),
        keep_ratio=True),
    dict(type='RandomFlip', prob=0.5),
    dict(type='TextDetRandomCrop', target_size=(640, 640)),
    dict(type='MMOCR2MMDet', poly2mask=True),
    dict(
        type='mmdet.PackDetInputs',
        meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
                   'scale_factor', 'flip_direction'))
]

test_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
    dict(
        type='LoadOCRAnnotations',
        with_polygon=True,
        with_bbox=True,
        with_label=True),
    dict(
        type='PackTextDetInputs',
        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]