Segementatin of update (#301)

2023-03-15 10:01:51 +08:00 · 2023-03-15 10:01:51 +08:00 · 88d804a5dc
parent 6036624cbc
commit 88d804a5dc
5 changed files with 361 additions and 13 deletions
--- a/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12.py
+++ b/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12.py
@ -1,4 +1,55 @@
-_base_ = ['./fcn_r50-d8_512x512_8xb4_60e_voc12aug.py']
+_base_ = ['configs/base.py']
+
+# model settings
+num_classes = 21
+
+# norm_cfg = dict(type='SyncBN', requires_grad=True)  # multi gpus
+norm_cfg = dict(type='BN', requires_grad=True)
+
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3, 4),
+        dilations=(1, 1, 2, 4),
+        strides=(1, 2, 1, 1),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True,
+    ),
+    decode_head=dict(
+        type='FCNHead',
+        in_channels=2048,
+        in_index=3,
+        channels=512,
+        num_convs=2,
+        concat_input=True,
+        dropout_ratio=0.1,
+        num_classes=num_classes,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=num_classes,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))

 CLASSES = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
@ -8,21 +59,102 @@ CLASSES = [

 # dataset settings
 dataset_type = 'SegDataset'
-data_root = 'data/VOCdevkit/VOC2012/'
+data_type = 'SegSourceRaw'
+data_root = 'data/VOCdevkit/VOC2012'

 train_img_root = data_root + 'JPEGImages'
 train_label_root = data_root + 'SegmentationClass'
 train_list_file = data_root + 'ImageSets/Segmentation/train.txt'

+val_img_root = data_root + 'JPEGImages'
+val_label_root = data_root + 'SegmentationClass'
+val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
+
+test_batch_size = 2
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (512, 512)
+train_pipeline = [
+    dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='SegRandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
+    dict(type='MMRandomFlip', flip_ratio=0.5),
+    dict(type='MMPhotoMetricDistortion'),
+    dict(type='MMNormalize', **img_norm_cfg),
+    dict(type='MMPad', size=(512, 512)),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_semantic_seg'],
+        meta_keys=('filename', 'ori_filename', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'img_norm_cfg')),
+]
+test_pipeline = [
+    dict(
+        type='MMMultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='MMResize', keep_ratio=True),
+            dict(type='MMRandomFlip'),
+            dict(type='MMNormalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=('filename', 'ori_filename', 'ori_shape',
+                           'img_shape', 'pad_shape', 'scale_factor', 'flip',
+                           'flip_direction', 'img_norm_cfg')),
+        ])
+]
 data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        ignore_index=255,
        data_source=dict(
-            _delete_=True,
-            type='SegSourceRaw',
+            type=data_type,
            img_root=train_img_root,
            label_root=train_label_root,
            split=train_list_file,
            classes=CLASSES),
-    ))
+        pipeline=train_pipeline),
+    val=dict(
+        imgs_per_gpu=test_batch_size,
+        ignore_index=255,
+        type=dataset_type,
+        data_source=dict(
+            type=data_type,
+            img_root=val_img_root,
+            label_root=val_label_root,
+            split=val_list_file,
+            classes=CLASSES,
+        ),
+        pipeline=test_pipeline))
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
+
+# runtime settings
+total_epochs = 60
+checkpoint_config = dict(interval=1)
+eval_config = dict(interval=1, gpu_collect=False)
+eval_pipelines = [
+    dict(
+        mode='test',
+        evaluators=[
+            dict(
+                type='SegmentationEvaluator',
+                classes=CLASSES,
+                metric_names=['mIoU'])
+        ],
+    )
+]
--- a/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py
+++ b/configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12aug.py
@ -1,4 +1,4 @@
-_base_ = ['configs/base.py']
+_base_ = 'configs/base.py'

 CLASSES = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
--- a/configs/segmentation/segformer/segformer_b5_coco.py
+++ b/configs/segmentation/segformer/segformer_b5_coco.py
@ -1,5 +1,36 @@
 _base_ = './segformer_b0_coco.py'

+CLASSES = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'branch', 'bridge',
+    'building-other', 'bush', 'cabinet', 'cage', 'cardboard', 'carpet',
+    'ceiling-other', 'ceiling-tile', 'cloth', 'clothes', 'clouds', 'counter',
+    'cupboard', 'curtain', 'desk-stuff', 'dirt', 'door-stuff', 'fence',
+    'floor-marble', 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood',
+    'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass',
+    'gravel', 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat',
+    'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net',
+    'paper', 'pavement', 'pillow', 'plant-other', 'plastic', 'platform',
+    'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof',
+    'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', 'snow',
+    'solid-other', 'stairs', 'stone', 'straw', 'structural-other', 'table',
+    'tent', 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick',
+    'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', 'wall-tile',
+    'wall-wood', 'water-other', 'waterdrops', 'window-blind', 'window-other',
+    'wood'
+]
+
 model = dict(
    pretrained=
    'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth',
@ -15,9 +46,11 @@ model = dict(

 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (2048, 640)
 crop_size = (640, 640)
 train_pipeline = [
-    dict(type='MMResize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)),
+    dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
    dict(type='SegRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
    dict(type='MMRandomFlip', flip_ratio=0.5),
    dict(type='MMPhotoMetricDistortion'),
@ -34,7 +67,7 @@ train_pipeline = [
 test_pipeline = [
    dict(
        type='MMMultiScaleFlipAug',
-        img_scale=(2048, 640),
+        img_scale=img_scale,
        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
        flip=False,
        transforms=[
@ -50,3 +83,50 @@ test_pipeline = [
                           'flip_direction', 'img_norm_cfg')),
        ])
 ]
+
+data_root = './data/coco_stuff164k/'
+# dataset settings
+data_type = 'SegSourceRaw'
+data_root = 'data/VOCdevkit/VOC2012'
+
+train_img_root = data_root + 'JPEGImages'
+train_label_root = data_root + 'SegmentationClass'
+train_list_file = data_root + 'ImageSets/Segmentation/train.txt'
+
+val_img_root = data_root + 'JPEGImages'
+val_label_root = data_root + 'SegmentationClass'
+val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
+
+test_batch_size = 2
+
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='SegDataset',
+        ignore_index=255,
+        data_source=dict(
+            type=data_type,
+            img_suffix='.jpg',
+            label_suffix='.png',
+            img_root=train_img_root,
+            label_root=train_label_root,
+            split=train_list_file,
+            classes=CLASSES,
+        ),
+        pipeline=train_pipeline),
+    val=dict(
+        imgs_per_gpu=test_batch_size,
+        ignore_index=255,
+        type='SegDataset',
+        data_source=dict(
+            type=data_type,
+            img_suffix='.jpg',
+            label_suffix='.png',
+            img_root=val_img_root,
+            label_root=val_label_root,
+            split=val_list_file,
+            classes=CLASSES,
+        ),
+        pipeline=test_pipeline),
+)
--- a/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12.py
+++ b/configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12.py
@ -1,4 +1,4 @@
-_base_ = ['./upernet_r50_512x512_8xb4_60e_voc12aug.py']
+_base_ = ['configs/base.py']

 CLASSES = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
@ -6,23 +6,153 @@ CLASSES = [
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
 ]

+# model settings
+num_classes = 21
+# norm_cfg = dict(type='SyncBN', requires_grad=True)  # multi gpus
+norm_cfg = dict(type='BN', requires_grad=True)
+
+model = dict(
+    type='EncoderDecoder',
+    pretrained='open-mmlab://resnet50_v1c',
+    backbone=dict(
+        type='ResNetV1c',
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3, 4),
+        dilations=(1, 1, 1, 1),
+        strides=(1, 2, 2, 2),
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        contract_dilation=True,
+    ),
+    decode_head=dict(
+        type='UPerHead',
+        in_channels=[256, 512, 1024, 2048],
+        in_index=[0, 1, 2, 3],
+        pool_scales=(1, 2, 3, 6),
+        channels=512,
+        dropout_ratio=0.1,
+        num_classes=num_classes,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    auxiliary_head=dict(
+        type='FCNHead',
+        in_channels=1024,
+        in_index=2,
+        channels=256,
+        num_convs=1,
+        concat_input=False,
+        dropout_ratio=0.1,
+        num_classes=21,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
 # dataset settings
 dataset_type = 'SegDataset'
-data_root = 'data/VOCdevkit/VOC2012/'
+data_type = 'SegSourceRaw'
+data_root = 'data/VOCdevkit/VOC2012'

 train_img_root = data_root + 'JPEGImages'
 train_label_root = data_root + 'SegmentationClass'
 train_list_file = data_root + 'ImageSets/Segmentation/train.txt'

+val_img_root = data_root + 'JPEGImages'
+val_label_root = data_root + 'SegmentationClass'
+val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
+
+test_batch_size = 2
+
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (512, 512)
+train_pipeline = [
+    dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='SegRandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
+    dict(type='MMRandomFlip', flip_ratio=0.5),
+    dict(type='MMPhotoMetricDistortion'),
+    dict(type='MMNormalize', **img_norm_cfg),
+    dict(type='MMPad', size=(512, 512)),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_semantic_seg'],
+        meta_keys=('filename', 'ori_filename', 'ori_shape', 'img_shape',
+                   'pad_shape', 'scale_factor', 'flip', 'flip_direction',
+                   'img_norm_cfg')),
+]
+test_pipeline = [
+    dict(
+        type='MMMultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='MMResize', keep_ratio=True),
+            dict(type='MMRandomFlip'),
+            dict(type='MMNormalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=('filename', 'ori_filename', 'ori_shape',
+                           'img_shape', 'pad_shape', 'scale_factor', 'flip',
+                           'flip_direction', 'img_norm_cfg')),
+        ])
+]
 data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        ignore_index=255,
        data_source=dict(
-            _delete_=True,
-            type='SegSourceRaw',
+            type=data_type,
            img_root=train_img_root,
            label_root=train_label_root,
            split=train_list_file,
            classes=CLASSES),
-    ))
+        pipeline=train_pipeline),
+    val=dict(
+        imgs_per_gpu=test_batch_size,
+        ignore_index=255,
+        type=dataset_type,
+        data_source=dict(
+            type=data_type,
+            img_root=val_img_root,
+            label_root=val_label_root,
+            split=val_list_file,
+            classes=CLASSES,
+        ),
+        pipeline=test_pipeline))
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
+
+# runtime settings
+total_epochs = 60
+checkpoint_config = dict(interval=1)
+eval_config = dict(interval=1, gpu_collect=False)
+eval_pipelines = [
+    dict(
+        mode='test',
+        evaluators=[
+            dict(
+                type='SegmentationEvaluator',
+                classes=CLASSES,
+                metric_names=['mIoU'])
+        ],
+    )
+]
--- a/easycv/utils/config_tools.py
+++ b/easycv/utils/config_tools.py
@ -531,6 +531,12 @@ CONFIG_TEMPLATE_ZOO = {
    'FCOS_ITAG_EASY': 'configs/detection/fcos/fcos_r50_torch_1x_pai.py',
    'FCOS_COCO_EASY': 'configs/detection/fcos/fcos_r50_torch_1x_coco.py',

+    # segmentation
+    'FCN_SEG': 'configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12.py',
+    'UPERNET_SEG':
+    'configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12.py',
+    'SEGFORMER_SEG': 'configs/segmentation/segformer/segformer_b5_coco.py',
+
    # ssl
    'MOCO_R50_TFRECORD': 'configs/config_templates/moco_r50_tfrecord.py',
    'MOCO_R50_TFRECORD_OSS':