Add Pascal Context to mmsegmentation (#133)
* Add Pascal Context to mmsegmentation * Add benchmark result to Pascal Context * fix mmcv version * fix code syntax * fix code syntax again * Update mmseg/models/segmentors/encoder_decoder.py update hint Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com> * update comment * fix pascal context model path * fix model path mistake again * fix model path mistake again * fix model path mistakes again Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>pull/145/head
parent
768c3ee650
commit
a2738fd9be
|
@ -0,0 +1,60 @@
|
|||
# dataset settings
|
||||
dataset_type = 'PascalContextDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2010/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (520, 520)
|
||||
crop_size = (480, 480)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/train.txt',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline))
|
|
@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
|
|||
| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) |
|
||||
| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) |
|
||||
| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) |
|
||||
|
||||
### Pascal Context
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||
|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) |
|
||||
| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) |
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/deeplabv3_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/deeplabv3_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_80k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
|
|||
| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) |
|
||||
| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) |
|
||||
| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) |
|
||||
|
||||
#### Pascal Context
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||
|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) |
|
||||
| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) |
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/deeplabv3plus_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/deeplabv3plus_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_80k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -43,3 +43,9 @@
|
|||
| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) |
|
||||
| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
|
||||
| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) |
|
||||
|
||||
### Pascal Context
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.14 | 45.67 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20200911_212515-9b565a6d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20200911_212515.log.json) |
|
||||
| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.47 | 45.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20200915_032644-a3828480.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20200915_032644.log.json) |
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
|
||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(decode_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
|
||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
||||
]
|
||||
model = dict(decode_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -44,3 +44,9 @@
|
|||
| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) |
|
||||
| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) |
|
||||
| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) |
|
||||
|
||||
### Pascal Context
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||
|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) |
|
||||
| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) |
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
|
||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(decode_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = [
|
||||
'../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
|
||||
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
|
||||
]
|
||||
model = dict(decode_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
|
||||
model = dict(
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18_small',
|
||||
backbone=dict(
|
||||
extra=dict(
|
||||
stage1=dict(num_blocks=(2, )),
|
||||
stage2=dict(num_blocks=(2, 2)),
|
||||
stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
|
||||
stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
|
||||
model = dict(
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18_small',
|
||||
backbone=dict(
|
||||
extra=dict(
|
||||
stage1=dict(num_blocks=(2, )),
|
||||
stage2=dict(num_blocks=(2, 2)),
|
||||
stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
|
||||
stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
|
|
@ -0,0 +1,10 @@
|
|||
_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
|
||||
model = dict(
|
||||
pretrained='open-mmlab://msra/hrnetv2_w48',
|
||||
backbone=dict(
|
||||
extra=dict(
|
||||
stage2=dict(num_channels=(48, 96)),
|
||||
stage3=dict(num_channels=(48, 96, 192)),
|
||||
stage4=dict(num_channels=(48, 96, 192, 384)))),
|
||||
decode_head=dict(
|
||||
in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
|
|
@ -0,0 +1,10 @@
|
|||
_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
|
||||
model = dict(
|
||||
pretrained='open-mmlab://msra/hrnetv2_w48',
|
||||
backbone=dict(
|
||||
extra=dict(
|
||||
stage2=dict(num_channels=(48, 96)),
|
||||
stage3=dict(num_channels=(48, 96, 192)),
|
||||
stage4=dict(num_channels=(48, 96, 192, 384)))),
|
||||
decode_head=dict(
|
||||
in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
|
|
@ -39,3 +39,9 @@
|
|||
| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) |
|
||||
| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
|
||||
| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
|
||||
|
||||
### Pascal Context
|
||||
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
|
||||
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) |
|
||||
| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) |
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,2 @@
|
|||
_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py'
|
||||
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/pspnet_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = [
|
||||
'../_base_/models/pspnet_r50-d8.py',
|
||||
'../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_80k.py'
|
||||
]
|
||||
model = dict(
|
||||
decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
|
||||
test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
|
||||
optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
|
|
@ -27,6 +27,14 @@ mmsegmentation
|
|||
│ │ │ ├── SegmentationClass
|
||||
│ │ │ ├── ImageSets
|
||||
│ │ │ │ ├── Segmentation
|
||||
│ │ ├── VOC2010
|
||||
│ │ │ ├── JPEGImages
|
||||
│ │ │ ├── SegmentationClassContext
|
||||
│ │ │ ├── ImageSets
|
||||
│ │ │ │ ├── SegmentationContext
|
||||
│ │ │ │ │ ├── train.txt
|
||||
│ │ │ │ │ ├── val.txt
|
||||
│ │ │ ├── trainval_merged.json
|
||||
│ │ ├── VOCaug
|
||||
│ │ │ ├── dataset
|
||||
│ │ │ │ ├── cls
|
||||
|
@ -69,6 +77,17 @@ Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/bl
|
|||
The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
|
||||
We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
|
||||
|
||||
### Pascal Context
|
||||
The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration.
|
||||
|
||||
To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json).
|
||||
|
||||
If you would like to use Pascal Context dataset, please install [Detail](https://github.com/ccvl/detail-api) and then run the following command to convert annotations into proper format.
|
||||
|
||||
```shell
|
||||
python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
|
||||
```
|
||||
|
||||
## Inference with pretrained models
|
||||
|
||||
We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),
|
||||
|
|
|
@ -3,10 +3,11 @@ from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
|
|||
from .cityscapes import CityscapesDataset
|
||||
from .custom import CustomDataset
|
||||
from .dataset_wrappers import ConcatDataset, RepeatDataset
|
||||
from .pascal_context import PascalContextDataset
|
||||
from .voc import PascalVOCDataset
|
||||
|
||||
__all__ = [
|
||||
'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
|
||||
'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
|
||||
'PascalVOCDataset', 'ADE20KDataset'
|
||||
'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
import os.path as osp
|
||||
|
||||
from .builder import DATASETS
|
||||
from .custom import CustomDataset
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
class PascalContextDataset(CustomDataset):
|
||||
"""PascalContext dataset.
|
||||
|
||||
In segmentation map annotation for PascalContext, 0 stands for background,
|
||||
which is included in 60 categories. ``reduce_zero_label`` is fixed to
|
||||
False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
|
||||
fixed to '.png'.
|
||||
|
||||
Args:
|
||||
split (str): Split txt file for PascalContext.
|
||||
"""
|
||||
|
||||
CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
|
||||
'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
|
||||
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
|
||||
'tvmonitor', 'bag', 'bed', 'bench', 'book', 'building',
|
||||
'cabinet', 'ceiling', 'cloth', 'computer', 'cup', 'door',
|
||||
'fence', 'floor', 'flower', 'food', 'grass', 'ground',
|
||||
'keyboard', 'light', 'mountain', 'mouse', 'curtain', 'platform',
|
||||
'sign', 'plate', 'road', 'rock', 'shelves', 'sidewalk', 'sky',
|
||||
'snow', 'bedclothes', 'track', 'tree', 'truck', 'wall', 'water',
|
||||
'window', 'wood')
|
||||
|
||||
PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
|
||||
[4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
|
||||
[230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
|
||||
[150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
|
||||
[143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
|
||||
[0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
|
||||
[255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
|
||||
[255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
|
||||
[255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
|
||||
[224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
|
||||
[255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
|
||||
[6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
|
||||
[140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
|
||||
[255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
|
||||
[255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]
|
||||
|
||||
def __init__(self, split, **kwargs):
|
||||
super(PascalContextDataset, self).__init__(
|
||||
img_suffix='.jpg',
|
||||
seg_map_suffix='.png',
|
||||
split=split,
|
||||
reduce_zero_label=False,
|
||||
**kwargs)
|
||||
assert osp.exists(self.img_dir) and self.split is not None
|
|
@ -167,13 +167,15 @@ class EncoderDecoder(BaseSegmentor):
|
|||
|
||||
# TODO refactor
|
||||
def slide_inference(self, img, img_meta, rescale):
|
||||
"""Inference by sliding-window with overlap."""
|
||||
"""Inference by sliding-window with overlap.
|
||||
|
||||
If h_crop > h_img or w_crop > w_img, the small patch will be used to
|
||||
decode without padding.
|
||||
"""
|
||||
|
||||
h_stride, w_stride = self.test_cfg.stride
|
||||
h_crop, w_crop = self.test_cfg.crop_size
|
||||
batch_size, _, h_img, w_img = img.size()
|
||||
assert h_crop <= h_img and w_crop <= w_img, (
|
||||
'crop size should not greater than image size')
|
||||
num_classes = self.num_classes
|
||||
h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
|
||||
w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
|
||||
|
|
|
@ -8,6 +8,6 @@ line_length = 79
|
|||
multi_line_output = 0
|
||||
known_standard_library = setuptools
|
||||
known_first_party = mmseg
|
||||
known_third_party = PIL,cityscapesscripts,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
|
||||
known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
|
||||
no_lines_before = STDLIB,LOCALFOLDER
|
||||
default_section = THIRDPARTY
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
import argparse
|
||||
import os.path as osp
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
import numpy as np
|
||||
from detail import Detail
|
||||
from PIL import Image
|
||||
|
||||
_mapping = np.sort(
|
||||
np.array([
|
||||
0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
|
||||
158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
|
||||
440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
|
||||
85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
|
||||
]))
|
||||
_key = np.array(range(len(_mapping))).astype('uint8')
|
||||
|
||||
|
||||
def generate_labels(img_id, detail, out_dir):
|
||||
|
||||
def _class_to_index(mask, _mapping, _key):
|
||||
# assert the values
|
||||
values = np.unique(mask)
|
||||
for i in range(len(values)):
|
||||
assert (values[i] in _mapping)
|
||||
index = np.digitize(mask.ravel(), _mapping, right=True)
|
||||
return _key[index].reshape(mask.shape)
|
||||
|
||||
mask = Image.fromarray(
|
||||
_class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
|
||||
filename = img_id['file_name']
|
||||
mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
|
||||
return osp.splitext(osp.basename(filename))[0]
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert PASCAL VOC annotations to mmdetection format')
|
||||
parser.add_argument('devkit_path', help='pascal voc devkit path')
|
||||
parser.add_argument('json_path', help='annoation json filepath')
|
||||
parser.add_argument('-o', '--out_dir', help='output path')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
devkit_path = args.devkit_path
|
||||
if args.out_dir is None:
|
||||
out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
|
||||
else:
|
||||
out_dir = args.out_dir
|
||||
json_path = args.json_path
|
||||
mmcv.mkdir_or_exist(out_dir)
|
||||
img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
|
||||
|
||||
train_detail = Detail(json_path, img_dir, 'train')
|
||||
train_ids = train_detail.getImgs()
|
||||
|
||||
val_detail = Detail(json_path, img_dir, 'val')
|
||||
val_ids = val_detail.getImgs()
|
||||
|
||||
mmcv.mkdir_or_exist(
|
||||
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
|
||||
|
||||
train_list = mmcv.track_progress(
|
||||
partial(generate_labels, detail=train_detail, out_dir=out_dir),
|
||||
train_ids)
|
||||
with open(
|
||||
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
|
||||
'train.txt'), 'w') as f:
|
||||
f.writelines(line + '\n' for line in sorted(train_list))
|
||||
|
||||
val_list = mmcv.track_progress(
|
||||
partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
|
||||
with open(
|
||||
osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
|
||||
'val.txt'), 'w') as f:
|
||||
f.writelines(line + '\n' for line in sorted(val_list))
|
||||
|
||||
print('Done!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue