mirror of https://github.com/alibaba/EasyCV.git
Segementatin of update (#301)
parent
6036624cbc
commit
88d804a5dc
|
@ -1,4 +1,55 @@
|
|||
_base_ = ['./fcn_r50-d8_512x512_8xb4_60e_voc12aug.py']
|
||||
_base_ = ['configs/base.py']
|
||||
|
||||
# model settings
|
||||
num_classes = 21
|
||||
|
||||
# norm_cfg = dict(type='SyncBN', requires_grad=True) # multi gpus
|
||||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3, 4),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True,
|
||||
),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
num_convs=2,
|
||||
concat_input=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=num_classes,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=num_classes,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
||||
CLASSES = [
|
||||
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
|
||||
|
@ -8,21 +59,102 @@ CLASSES = [
|
|||
|
||||
# dataset settings
|
||||
dataset_type = 'SegDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012/'
|
||||
data_type = 'SegSourceRaw'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
|
||||
train_img_root = data_root + 'JPEGImages'
|
||||
train_label_root = data_root + 'SegmentationClass'
|
||||
train_list_file = data_root + 'ImageSets/Segmentation/train.txt'
|
||||
|
||||
val_img_root = data_root + 'JPEGImages'
|
||||
val_label_root = data_root + 'SegmentationClass'
|
||||
val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
|
||||
|
||||
test_batch_size = 2
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='SegRandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
||||
dict(type='MMRandomFlip', flip_ratio=0.5),
|
||||
dict(type='MMPhotoMetricDistortion'),
|
||||
dict(type='MMNormalize', **img_norm_cfg),
|
||||
dict(type='MMPad', size=(512, 512)),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_semantic_seg'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'img_norm_cfg')),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='MMMultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='MMResize', keep_ratio=True),
|
||||
dict(type='MMRandomFlip'),
|
||||
dict(type='MMNormalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_shape',
|
||||
'img_shape', 'pad_shape', 'scale_factor', 'flip',
|
||||
'flip_direction', 'img_norm_cfg')),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
imgs_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ignore_index=255,
|
||||
data_source=dict(
|
||||
_delete_=True,
|
||||
type='SegSourceRaw',
|
||||
type=data_type,
|
||||
img_root=train_img_root,
|
||||
label_root=train_label_root,
|
||||
split=train_list_file,
|
||||
classes=CLASSES),
|
||||
))
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
imgs_per_gpu=test_batch_size,
|
||||
ignore_index=255,
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_type,
|
||||
img_root=val_img_root,
|
||||
label_root=val_label_root,
|
||||
split=val_list_file,
|
||||
classes=CLASSES,
|
||||
),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
|
||||
|
||||
# runtime settings
|
||||
total_epochs = 60
|
||||
checkpoint_config = dict(interval=1)
|
||||
eval_config = dict(interval=1, gpu_collect=False)
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
evaluators=[
|
||||
dict(
|
||||
type='SegmentationEvaluator',
|
||||
classes=CLASSES,
|
||||
metric_names=['mIoU'])
|
||||
],
|
||||
)
|
||||
]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_base_ = ['configs/base.py']
|
||||
_base_ = 'configs/base.py'
|
||||
|
||||
CLASSES = [
|
||||
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
|
||||
|
|
|
@ -1,5 +1,36 @@
|
|||
_base_ = './segformer_b0_coco.py'
|
||||
|
||||
CLASSES = [
|
||||
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
|
||||
'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
|
||||
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
|
||||
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
|
||||
'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
|
||||
'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
|
||||
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
|
||||
'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
|
||||
'hair drier', 'toothbrush', 'banner', 'blanket', 'branch', 'bridge',
|
||||
'building-other', 'bush', 'cabinet', 'cage', 'cardboard', 'carpet',
|
||||
'ceiling-other', 'ceiling-tile', 'cloth', 'clothes', 'clouds', 'counter',
|
||||
'cupboard', 'curtain', 'desk-stuff', 'dirt', 'door-stuff', 'fence',
|
||||
'floor-marble', 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood',
|
||||
'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass',
|
||||
'gravel', 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat',
|
||||
'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net',
|
||||
'paper', 'pavement', 'pillow', 'plant-other', 'plastic', 'platform',
|
||||
'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof',
|
||||
'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', 'snow',
|
||||
'solid-other', 'stairs', 'stone', 'straw', 'structural-other', 'table',
|
||||
'tent', 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick',
|
||||
'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', 'wall-tile',
|
||||
'wall-wood', 'water-other', 'waterdrops', 'window-blind', 'window-other',
|
||||
'wood'
|
||||
]
|
||||
|
||||
model = dict(
|
||||
pretrained=
|
||||
'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth',
|
||||
|
@ -15,9 +46,11 @@ model = dict(
|
|||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (2048, 640)
|
||||
crop_size = (640, 640)
|
||||
train_pipeline = [
|
||||
dict(type='MMResize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)),
|
||||
dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='SegRandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='MMRandomFlip', flip_ratio=0.5),
|
||||
dict(type='MMPhotoMetricDistortion'),
|
||||
|
@ -34,7 +67,7 @@ train_pipeline = [
|
|||
test_pipeline = [
|
||||
dict(
|
||||
type='MMMultiScaleFlipAug',
|
||||
img_scale=(2048, 640),
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
|
@ -50,3 +83,50 @@ test_pipeline = [
|
|||
'flip_direction', 'img_norm_cfg')),
|
||||
])
|
||||
]
|
||||
|
||||
data_root = './data/coco_stuff164k/'
|
||||
# dataset settings
|
||||
data_type = 'SegSourceRaw'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
|
||||
train_img_root = data_root + 'JPEGImages'
|
||||
train_label_root = data_root + 'SegmentationClass'
|
||||
train_list_file = data_root + 'ImageSets/Segmentation/train.txt'
|
||||
|
||||
val_img_root = data_root + 'JPEGImages'
|
||||
val_label_root = data_root + 'SegmentationClass'
|
||||
val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
|
||||
|
||||
test_batch_size = 2
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type='SegDataset',
|
||||
ignore_index=255,
|
||||
data_source=dict(
|
||||
type=data_type,
|
||||
img_suffix='.jpg',
|
||||
label_suffix='.png',
|
||||
img_root=train_img_root,
|
||||
label_root=train_label_root,
|
||||
split=train_list_file,
|
||||
classes=CLASSES,
|
||||
),
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
imgs_per_gpu=test_batch_size,
|
||||
ignore_index=255,
|
||||
type='SegDataset',
|
||||
data_source=dict(
|
||||
type=data_type,
|
||||
img_suffix='.jpg',
|
||||
label_suffix='.png',
|
||||
img_root=val_img_root,
|
||||
label_root=val_label_root,
|
||||
split=val_list_file,
|
||||
classes=CLASSES,
|
||||
),
|
||||
pipeline=test_pipeline),
|
||||
)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
_base_ = ['./upernet_r50_512x512_8xb4_60e_voc12aug.py']
|
||||
_base_ = ['configs/base.py']
|
||||
|
||||
CLASSES = [
|
||||
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
|
||||
|
@ -6,23 +6,153 @@ CLASSES = [
|
|||
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
|
||||
]
|
||||
|
||||
# model settings
|
||||
num_classes = 21
|
||||
# norm_cfg = dict(type='SyncBN', requires_grad=True) # multi gpus
|
||||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3, 4),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True,
|
||||
),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=num_classes,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=21,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
||||
# dataset settings
|
||||
dataset_type = 'SegDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012/'
|
||||
data_type = 'SegSourceRaw'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
|
||||
train_img_root = data_root + 'JPEGImages'
|
||||
train_label_root = data_root + 'SegmentationClass'
|
||||
train_list_file = data_root + 'ImageSets/Segmentation/train.txt'
|
||||
|
||||
val_img_root = data_root + 'JPEGImages'
|
||||
val_label_root = data_root + 'SegmentationClass'
|
||||
val_list_file = data_root + 'ImageSets/Segmentation/val.txt'
|
||||
|
||||
test_batch_size = 2
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='MMResize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='SegRandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
||||
dict(type='MMRandomFlip', flip_ratio=0.5),
|
||||
dict(type='MMPhotoMetricDistortion'),
|
||||
dict(type='MMNormalize', **img_norm_cfg),
|
||||
dict(type='MMPad', size=(512, 512)),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_semantic_seg'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'img_norm_cfg')),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='MMMultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='MMResize', keep_ratio=True),
|
||||
dict(type='MMRandomFlip'),
|
||||
dict(type='MMNormalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_shape',
|
||||
'img_shape', 'pad_shape', 'scale_factor', 'flip',
|
||||
'flip_direction', 'img_norm_cfg')),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
imgs_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ignore_index=255,
|
||||
data_source=dict(
|
||||
_delete_=True,
|
||||
type='SegSourceRaw',
|
||||
type=data_type,
|
||||
img_root=train_img_root,
|
||||
label_root=train_label_root,
|
||||
split=train_list_file,
|
||||
classes=CLASSES),
|
||||
))
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
imgs_per_gpu=test_batch_size,
|
||||
ignore_index=255,
|
||||
type=dataset_type,
|
||||
data_source=dict(
|
||||
type=data_type,
|
||||
img_root=val_img_root,
|
||||
label_root=val_label_root,
|
||||
split=val_list_file,
|
||||
classes=CLASSES,
|
||||
),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=True)
|
||||
|
||||
# runtime settings
|
||||
total_epochs = 60
|
||||
checkpoint_config = dict(interval=1)
|
||||
eval_config = dict(interval=1, gpu_collect=False)
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
evaluators=[
|
||||
dict(
|
||||
type='SegmentationEvaluator',
|
||||
classes=CLASSES,
|
||||
metric_names=['mIoU'])
|
||||
],
|
||||
)
|
||||
]
|
||||
|
|
|
@ -531,6 +531,12 @@ CONFIG_TEMPLATE_ZOO = {
|
|||
'FCOS_ITAG_EASY': 'configs/detection/fcos/fcos_r50_torch_1x_pai.py',
|
||||
'FCOS_COCO_EASY': 'configs/detection/fcos/fcos_r50_torch_1x_coco.py',
|
||||
|
||||
# segmentation
|
||||
'FCN_SEG': 'configs/segmentation/fcn/fcn_r50-d8_512x512_8xb4_60e_voc12.py',
|
||||
'UPERNET_SEG':
|
||||
'configs/segmentation/upernet/upernet_r50_512x512_8xb4_60e_voc12.py',
|
||||
'SEGFORMER_SEG': 'configs/segmentation/segformer/segformer_b5_coco.py',
|
||||
|
||||
# ssl
|
||||
'MOCO_R50_TFRECORD': 'configs/config_templates/moco_r50_tfrecord.py',
|
||||
'MOCO_R50_TFRECORD_OSS':
|
||||
|
|
Loading…
Reference in New Issue