Add Pascal Context to mmsegmentation (#133)

* Add Pascal Context to mmsegmentation

* Add benchmark result to Pascal Context

* fix mmcv version

* fix code syntax

* fix code syntax again

* Update mmseg/models/segmentors/encoder_decoder.py

update hint

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>

* update comment

* fix pascal context model path

* fix model path mistake again

* fix model path mistake again

* fix model path mistakes again

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>
pull/145/head
yamengxi 2020-09-22 14:56:13 +08:00 committed by GitHub
parent 768c3ee650
commit a2738fd9be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 393 additions and 5 deletions

View File

@ -0,0 +1,60 @@
# dataset settings
dataset_type = 'PascalContextDataset'
data_root = 'data/VOCdevkit/VOC2010/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_scale = (520, 520)
crop_size = (480, 480)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClassContext',
split='ImageSets/SegmentationContext/train.txt',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClassContext',
split='ImageSets/SegmentationContext/val.txt',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='JPEGImages',
ann_dir='SegmentationClassContext',
split='ImageSets/SegmentationContext/val.txt',
pipeline=test_pipeline))

View File

@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) |
| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) |
| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) |
### Pascal Context
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) |
| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) |

View File

@ -0,0 +1,2 @@
_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,2 @@
_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/deeplabv3_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_40k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/deeplabv3_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_80k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) |
| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) |
| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) |
#### Pascal Context
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) |
| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) |

View File

@ -0,0 +1,2 @@
_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,2 @@
_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/deeplabv3plus_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_40k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/deeplabv3plus_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_80k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -43,3 +43,9 @@
| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) |
| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) |
### Pascal Context
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.14 | 45.67 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20200911_212515-9b565a6d.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20200911_212515.log.json) |
| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.47 | 45.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20200915_032644-a3828480.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20200915_032644.log.json) |

View File

@ -0,0 +1,2 @@
_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,2 @@
_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,7 @@
_base_ = [
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
]
model = dict(decode_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,7 @@
_base_ = [
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
model = dict(decode_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -44,3 +44,9 @@
| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) |
| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) |
| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) |
### Pascal Context
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) |
| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) |

View File

@ -0,0 +1,7 @@
_base_ = [
'../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
]
model = dict(decode_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,7 @@
_base_ = [
'../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
model = dict(decode_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,9 @@
_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w18_small',
backbone=dict(
extra=dict(
stage1=dict(num_blocks=(2, )),
stage2=dict(num_blocks=(2, 2)),
stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))

View File

@ -0,0 +1,9 @@
_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w18_small',
backbone=dict(
extra=dict(
stage1=dict(num_blocks=(2, )),
stage2=dict(num_blocks=(2, 2)),
stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))

View File

@ -0,0 +1,10 @@
_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w48',
backbone=dict(
extra=dict(
stage2=dict(num_channels=(48, 96)),
stage3=dict(num_channels=(48, 96, 192)),
stage4=dict(num_channels=(48, 96, 192, 384)))),
decode_head=dict(
in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))

View File

@ -0,0 +1,10 @@
_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w48',
backbone=dict(
extra=dict(
stage2=dict(num_channels=(48, 96)),
stage3=dict(num_channels=(48, 96, 192)),
stage4=dict(num_channels=(48, 96, 192, 384)))),
decode_head=dict(
in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))

View File

@ -39,3 +39,9 @@
| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) |
| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
### Pascal Context
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) |
| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) |

View File

@ -0,0 +1,2 @@
_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,2 @@
_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/pspnet_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_40k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -0,0 +1,9 @@
_base_ = [
'../_base_/models/pspnet_r50-d8.py',
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_80k.py'
]
model = dict(
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)

View File

@ -27,6 +27,14 @@ mmsegmentation
│ │ │ ├── SegmentationClass
│ │ │ ├── ImageSets
│ │ │ │ ├── Segmentation
│ │ ├── VOC2010
│ │ │ ├── JPEGImages
│ │ │ ├── SegmentationClassContext
│ │ │ ├── ImageSets
│ │ │ │ ├── SegmentationContext
│ │ │ │ │ ├── train.txt
│ │ │ │ │ ├── val.txt
│ │ │ ├── trainval_merged.json
│ │ ├── VOCaug
│ │ │ ├── dataset
│ │ │ │ ├── cls
@ -69,6 +77,17 @@ Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/bl
The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
### Pascal Context
The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration.
To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json).
If you would like to use Pascal Context dataset, please install [Detail](https://github.com/ccvl/detail-api) and then run the following command to convert annotations into proper format.
```shell
python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
```
## Inference with pretrained models
We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),

View File

@ -3,10 +3,11 @@ from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
from .cityscapes import CityscapesDataset
from .custom import CustomDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .pascal_context import PascalContextDataset
from .voc import PascalVOCDataset
__all__ = [
'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
'PascalVOCDataset', 'ADE20KDataset'
'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset'
]

View File

@ -0,0 +1,54 @@
import os.path as osp
from .builder import DATASETS
from .custom import CustomDataset
@DATASETS.register_module()
class PascalContextDataset(CustomDataset):
"""PascalContext dataset.
In segmentation map annotation for PascalContext, 0 stands for background,
which is included in 60 categories. ``reduce_zero_label`` is fixed to
False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
fixed to '.png'.
Args:
split (str): Split txt file for PascalContext.
"""
CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor', 'bag', 'bed', 'bench', 'book', 'building',
'cabinet', 'ceiling', 'cloth', 'computer', 'cup', 'door',
'fence', 'floor', 'flower', 'food', 'grass', 'ground',
'keyboard', 'light', 'mountain', 'mouse', 'curtain', 'platform',
'sign', 'plate', 'road', 'rock', 'shelves', 'sidewalk', 'sky',
'snow', 'bedclothes', 'track', 'tree', 'truck', 'wall', 'water',
'window', 'wood')
PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
[4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
[230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
[150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
[143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
[0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
[255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
[255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
[255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
[224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
[255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
[6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
[140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
[255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
[255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]
def __init__(self, split, **kwargs):
super(PascalContextDataset, self).__init__(
img_suffix='.jpg',
seg_map_suffix='.png',
split=split,
reduce_zero_label=False,
**kwargs)
assert osp.exists(self.img_dir) and self.split is not None

View File

@ -167,13 +167,15 @@ class EncoderDecoder(BaseSegmentor):
# TODO refactor
def slide_inference(self, img, img_meta, rescale):
"""Inference by sliding-window with overlap."""
"""Inference by sliding-window with overlap.
If h_crop > h_img or w_crop > w_img, the small patch will be used to
decode without padding.
"""
h_stride, w_stride = self.test_cfg.stride
h_crop, w_crop = self.test_cfg.crop_size
batch_size, _, h_img, w_img = img.size()
assert h_crop <= h_img and w_crop <= w_img, (
'crop size should not greater than image size')
num_classes = self.num_classes
h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1

View File

@ -8,6 +8,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmseg
known_third_party = PIL,cityscapesscripts,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY

View File

@ -0,0 +1,86 @@
import argparse
import os.path as osp
from functools import partial
import mmcv
import numpy as np
from detail import Detail
from PIL import Image
_mapping = np.sort(
np.array([
0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
]))
_key = np.array(range(len(_mapping))).astype('uint8')
def generate_labels(img_id, detail, out_dir):
def _class_to_index(mask, _mapping, _key):
# assert the values
values = np.unique(mask)
for i in range(len(values)):
assert (values[i] in _mapping)
index = np.digitize(mask.ravel(), _mapping, right=True)
return _key[index].reshape(mask.shape)
mask = Image.fromarray(
_class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
filename = img_id['file_name']
mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
return osp.splitext(osp.basename(filename))[0]
def parse_args():
parser = argparse.ArgumentParser(
description='Convert PASCAL VOC annotations to mmdetection format')
parser.add_argument('devkit_path', help='pascal voc devkit path')
parser.add_argument('json_path', help='annoation json filepath')
parser.add_argument('-o', '--out_dir', help='output path')
args = parser.parse_args()
return args
def main():
args = parse_args()
devkit_path = args.devkit_path
if args.out_dir is None:
out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
else:
out_dir = args.out_dir
json_path = args.json_path
mmcv.mkdir_or_exist(out_dir)
img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
train_detail = Detail(json_path, img_dir, 'train')
train_ids = train_detail.getImgs()
val_detail = Detail(json_path, img_dir, 'val')
val_ids = val_detail.getImgs()
mmcv.mkdir_or_exist(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
train_list = mmcv.track_progress(
partial(generate_labels, detail=train_detail, out_dir=out_dir),
train_ids)
with open(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
'train.txt'), 'w') as f:
f.writelines(line + '\n' for line in sorted(train_list))
val_list = mmcv.track_progress(
partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
with open(
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
'val.txt'), 'w') as f:
f.writelines(line + '\n' for line in sorted(val_list))
print('Done!')
if __name__ == '__main__':
main()