[Config] Simplify the Mask R-CNN config (#1391)

* update mask rcnn cfg * update
2022-09-21 15:44:37 +08:00 · 2022-09-21 15:44:37 +08:00 · 1077ce4294
parent 0dd72f40f7
commit 1077ce4294
1 changed files with 13 additions and 130 deletions
--- a/configs/textdet/maskrcnn/_base_mask-rcnn_resnet50_fpn.py
+++ b/configs/textdet/maskrcnn/_base_mask-rcnn_resnet50_fpn.py
@ -1,138 +1,21 @@
+_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']
+
 file_client_args = dict(backend='disk')

-model = dict(
-    type='MMDetWrapper',
-    text_repr_type='poly',
-    cfg=dict(
-        type='MaskRCNN',
-        data_preprocessor=dict(
-            type='DetDataPreprocessor',
-            mean=[123.675, 116.28, 103.53],
-            std=[58.395, 57.12, 57.375],
-            bgr_to_rgb=True,
-            pad_size_divisor=32),
-        backbone=dict(
-            type='ResNet',
-            depth=50,
-            num_stages=4,
-            out_indices=(0, 1, 2, 3),
-            frozen_stages=1,
-            norm_cfg=dict(type='BN', requires_grad=True),
-            norm_eval=True,
-            style='pytorch',
-            init_cfg=dict(
-                type='Pretrained', checkpoint='torchvision://resnet50')),
-        neck=dict(
-            type='FPN',
-            in_channels=[256, 512, 1024, 2048],
-            out_channels=256,
-            num_outs=5),
+mask_rcnn = _base_.pop('model')
+# Adapt Mask R-CNN model to OCR task
+mask_rcnn.update(
+    dict(
+        data_preprocessor=dict(pad_mask=False),
        rpn_head=dict(
-            type='RPNHead',
-            in_channels=256,
-            feat_channels=256,
            anchor_generator=dict(
-                type='AnchorGenerator',
-                scales=[4],
-                ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
-                strides=[4, 8, 16, 32, 64]),
-            bbox_coder=dict(
-                type='DeltaXYWHBBoxCoder',
-                target_means=[.0, .0, .0, .0],
-                target_stds=[1.0, 1.0, 1.0, 1.0]),
-            loss_cls=dict(
-                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
-            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+                scales=[4], ratios=[0.17, 0.44, 1.13, 2.90, 7.46])),
        roi_head=dict(
-            type='StandardRoIHead',
-            bbox_roi_extractor=dict(
-                type='SingleRoIExtractor',
-                roi_layer=dict(
-                    type='RoIAlign', output_size=7, sampling_ratio=0.),
-                out_channels=256,
-                featmap_strides=[4, 8, 16, 32]),
-            bbox_head=dict(
-                type='Shared2FCBBoxHead',
-                in_channels=256,
-                fc_out_channels=1024,
-                roi_feat_size=7,
-                num_classes=1,
-                bbox_coder=dict(
-                    type='DeltaXYWHBBoxCoder',
-                    target_means=[0., 0., 0., 0.],
-                    target_stds=[0.1, 0.1, 0.2, 0.2]),
-                reg_class_agnostic=False,
-                loss_cls=dict(
-                    type='CrossEntropyLoss',
-                    use_sigmoid=False,
-                    loss_weight=1.0),
-                loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
-            mask_roi_extractor=dict(
-                type='SingleRoIExtractor',
-                roi_layer=dict(
-                    type='RoIAlign', output_size=14, sampling_ratio=0.),
-                out_channels=256,
-                featmap_strides=[4, 8, 16, 32]),
-            mask_head=dict(
-                type='FCNMaskHead',
-                num_convs=4,
-                in_channels=256,
-                conv_out_channels=256,
-                num_classes=1,
-                loss_mask=dict(
-                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
-        # model training and testing settings
-        train_cfg=dict(
-            rpn=dict(
-                assigner=dict(
-                    type='MaxIoUAssigner',
-                    pos_iou_thr=0.7,
-                    neg_iou_thr=0.3,
-                    min_pos_iou=0.3,
-                    match_low_quality=True,
-                    ignore_iof_thr=-1),
-                sampler=dict(
-                    type='RandomSampler',
-                    num=256,
-                    pos_fraction=0.5,
-                    neg_pos_ub=-1,
-                    add_gt_as_proposals=False),
-                allowed_border=-1,
-                pos_weight=-1,
-                debug=False),
-            rpn_proposal=dict(
-                nms_pre=2000,
-                max_per_img=1000,
-                nms=dict(type='nms', iou_threshold=0.7),
-                min_bbox_size=0),
-            rcnn=dict(
-                assigner=dict(
-                    type='MaxIoUAssigner',
-                    pos_iou_thr=0.5,
-                    neg_iou_thr=0.5,
-                    min_pos_iou=0.5,
-                    match_low_quality=True,
-                    ignore_iof_thr=-1),
-                sampler=dict(
-                    type='RandomSampler',
-                    num=512,
-                    pos_fraction=0.25,
-                    neg_pos_ub=-1,
-                    add_gt_as_proposals=True),
-                mask_size=28,
-                pos_weight=-1,
-                debug=False)),
-        test_cfg=dict(
-            rpn=dict(
-                nms_pre=1000,
-                max_per_img=1000,
-                nms=dict(type='nms', iou_threshold=0.7),
-                min_bbox_size=0),
-            rcnn=dict(
-                score_thr=0.05,
-                nms=dict(type='nms', iou_threshold=0.5),
-                max_per_img=100,
-                mask_thr_binary=0.5))))
+            bbox_head=dict(num_classes=1),
+            mask_head=dict(num_classes=1),
+        )))
+
+model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)

 train_pipeline = [
    dict(