Add Pascal Context to mmsegmentation (#133)

* Add Pascal Context to mmsegmentation * Add benchmark result to Pascal Context * fix mmcv version * fix code syntax * fix code syntax again * Update mmseg/models/segmentors/encoder_decoder.py update hint Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com> * update comment * fix pascal context model path * fix model path mistake again * fix model path mistake again * fix model path mistakes again Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>
2025-06-03 22:03:48 +08:00 · 2020-09-22 14:56:13 +08:00 · 2020-09-22 14:56:13 +08:00 · a2738fd9be
commit a2738fd9be
parent 768c3ee650
34 changed files with 393 additions and 5 deletions
--- a/configs/_base_/datasets/pascal_context.py
+++ b/configs/_base_/datasets/pascal_context.py
@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'PascalContextDataset'
+data_root = 'data/VOCdevkit/VOC2010/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+
+img_scale = (520, 520)
+crop_size = (480, 480)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
+    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=img_scale,
+        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/train.txt',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        img_dir='JPEGImages',
+        ann_dir='SegmentationClassContext',
+        split='ImageSets/SegmentationContext/val.txt',
+        pipeline=test_pipeline))
--- a/configs/deeplabv3/README.md
+++ b/configs/deeplabv3/README.md
@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
 | DeepLabV3 | R-101-D8 | 512x512   |   20000 |      9.6 |           9.81 | 78.70 |         79.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) |
 | DeepLabV3 | R-50-D8  | 512x512   |   40000 | -        | -              | 77.68 |         78.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json)     |
 | DeepLabV3 | R-101-D8 | 512x512   |   40000 | -        | -              | 77.92 |         79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) |
+
+### Pascal Context
+|  Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                    download                                                                                                                                                                                                    |
+|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3 | R-101-D8 | 480x480   |   40000 |      9.2 |           7.09 | 46.55 |         47.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) |
+| DeepLabV3 | R-101-D8 | 480x480   |   80000 | -        | -              | 46.42 |         47.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) |
--- a/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py
+++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py
+++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py
+++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py
+++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/deeplabv3plus/README.md
+++ b/configs/deeplabv3plus/README.md
@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series
 | DeepLabV3+ | R-101-D8 | 512x512   |   20000 |       11 |          13.88 | 77.22 |         78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) |
 | DeepLabV3+ | R-50-D8  | 512x512   |   40000 | -        | -              | 76.81 |         77.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json)     |
 | DeepLabV3+ | R-101-D8 | 512x512   |   40000 | -        | -              | 78.62 |         79.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) |
+
+#### Pascal Context
+|   Method   | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                                                download                                                                                                                                                                                                                |
+|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| DeepLabV3+ | R-101-D8 | 480x480   |   40000 |       - |          9.09 | 47.30 |         48.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) |
+| DeepLabV3+ | R-101-D8 | 480x480   |   80000 | -        | -              | 47.23 |         48.26 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) |
--- a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py
+++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py
+++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/deeplabv3plus_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/fcn/README.md
+++ b/configs/fcn/README.md
@ -43,3 +43,9 @@
 | FCN    | R-101-D8 | 512x512   |   20000 |      9.2 |          14.81 | 71.16 |         73.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) |
 | FCN    | R-50-D8  | 512x512   |   40000 | -        | -              | 66.97 |         69.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)     |
 | FCN    | R-101-D8 | 512x512   |   40000 | -        | -              | 69.91 |         72.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) |
+
+### Pascal Context
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                  download                                                                                                                                                                                  |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | R-101-D8 | 480x480   |   40000 |      - |          9.93 | 44.14 |         45.67 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20200911_212515-9b565a6d.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20200911_212515.log.json) |
+| FCN    | R-101-D8 | 480x480   |   80000 | -        | -              | 44.47 |         45.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20200915_032644-a3828480.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20200915_032644.log.json) |
--- a/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py
+++ b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py
+++ b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py
+++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py
@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(decode_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py
+++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py
@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(decode_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/hrnet/README.md
+++ b/configs/hrnet/README.md
@ -44,3 +44,9 @@
 | FCN    | HRNetV2p-W18-Small | 512x512   |   40000 | -        | -              | 66.61 |         70.00 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) |
 | FCN    | HRNetV2p-W18       | 512x512   |   40000 | -        | -              | 72.90 |         75.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json)     |
 | FCN    | HRNetV2p-W48       | 512x512   |   40000 | -        | -              | 76.24 |         78.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json)     |
+
+### Pascal Context
+| Method |      Backbone      | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                download                                                                                                                                                                                |
+|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| FCN    | HRNetV2p-W48       | 480x480   |   40000 | 6.1        | 8.86              | 45.14 |         47.42 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json)     |
+| FCN    | HRNetV2p-W48       | 480x480   |   80000 | -        | -              | 45.84 |         47.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json)     |
--- a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py
+++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py
@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
+]
+model = dict(decode_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py
+++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py
@ -0,0 +1,7 @@
+_base_ = [
+    '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+model = dict(decode_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py
+++ b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
--- a/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py
+++ b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w18_small',
+    backbone=dict(
+        extra=dict(
+            stage1=dict(num_blocks=(2, )),
+            stage2=dict(num_blocks=(2, 2)),
+            stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
+            stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))
--- a/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py
+++ b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py
@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_40k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
--- a/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py
+++ b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py
@ -0,0 +1,10 @@
+_base_ = './fcn_hr18_480x480_80k_pascal_context.py'
+model = dict(
+    pretrained='open-mmlab://msra/hrnetv2_w48',
+    backbone=dict(
+        extra=dict(
+            stage2=dict(num_channels=(48, 96)),
+            stage3=dict(num_channels=(48, 96, 192)),
+            stage4=dict(num_channels=(48, 96, 192, 384)))),
+    decode_head=dict(
+        in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))
--- a/configs/pspnet/README.md
+++ b/configs/pspnet/README.md
@ -39,3 +39,9 @@
 | PSPNet | R-101-D8 | 512x512   |   20000 |      9.6 |          15.02 | 78.47 |         79.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) |
 | PSPNet | R-50-D8  | 512x512   |   40000 | -        | -              | 77.29 |         78.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json)     |
 | PSPNet | R-101-D8 | 512x512   |   40000 | -        | -              | 78.52 |         79.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) |
+
+### Pascal Context
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                           download                                                                                                                                                                                           |
+|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| PSPNet | R-101-D8 | 480x480   |   40000 |      8.8 |          9.68 | 46.60 |         47.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) |
+| PSPNet | R-101-D8 | 480x480   |   80000 | -        | -              | 46.03 |         47.15 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) |
--- a/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py
+++ b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py
+++ b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py
@ -0,0 +1,2 @@
+_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py'
+model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
--- a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py
+++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_40k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py
+++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py
@ -0,0 +1,9 @@
+_base_ = [
+    '../_base_/models/pspnet_r50-d8.py',
+    '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py',
+    '../_base_/schedules/schedule_80k.py'
+]
+model = dict(
+    decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60))
+test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320))
+optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001)
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@ -27,6 +27,14 @@ mmsegmentation
 │   │   │   ├── SegmentationClass
 │   │   │   ├── ImageSets
 │   │   │   │   ├── Segmentation
+│   │   ├── VOC2010
+│   │   │   ├── JPEGImages
+│   │   │   ├── SegmentationClassContext
+│   │   │   ├── ImageSets
+│   │   │   │   ├── SegmentationContext
+│   │   │   │   │   ├── train.txt
+│   │   │   │   │   ├── val.txt
+│   │   │   ├── trainval_merged.json
 │   │   ├── VOCaug
 │   │   │   ├── dataset
 │   │   │   │   ├── cls
@ -69,6 +77,17 @@ Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/bl
 The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).
 We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip).

+### Pascal Context
+The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration.
+
+To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json).
+
+If you would like to use Pascal Context dataset, please install [Detail](https://github.com/ccvl/detail-api) and then run the following command to convert annotations into proper format.
+
+```shell
+python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
+```
+
 ## Inference with pretrained models

 We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.),
--- a/mmseg/datasets/init.py
+++ b/mmseg/datasets/init.py
@ -3,10 +3,11 @@ from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 from .cityscapes import CityscapesDataset
 from .custom import CustomDataset
 from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .pascal_context import PascalContextDataset
 from .voc import PascalVOCDataset

 __all__ = [
    'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
    'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
-    'PascalVOCDataset', 'ADE20KDataset'
+    'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset'
 ]
--- a/mmseg/datasets/pascal_context.py
+++ b/mmseg/datasets/pascal_context.py
@ -0,0 +1,54 @@
+import os.path as osp
+
+from .builder import DATASETS
+from .custom import CustomDataset
+
+
+@DATASETS.register_module()
+class PascalContextDataset(CustomDataset):
+    """PascalContext dataset.
+
+    In segmentation map annotation for PascalContext, 0 stands for background,
+    which is included in 60 categories. ``reduce_zero_label`` is fixed to
+    False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is
+    fixed to '.png'.
+
+    Args:
+        split (str): Split txt file for PascalContext.
+    """
+
+    CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+               'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse',
+               'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+               'tvmonitor', 'bag', 'bed', 'bench', 'book', 'building',
+               'cabinet', 'ceiling', 'cloth', 'computer', 'cup', 'door',
+               'fence', 'floor', 'flower', 'food', 'grass', 'ground',
+               'keyboard', 'light', 'mountain', 'mouse', 'curtain', 'platform',
+               'sign', 'plate', 'road', 'rock', 'shelves', 'sidewalk', 'sky',
+               'snow', 'bedclothes', 'track', 'tree', 'truck', 'wall', 'water',
+               'window', 'wood')
+
+    PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
+               [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
+               [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
+               [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
+               [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
+               [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
+               [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
+               [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
+               [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
+               [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
+               [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
+               [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
+               [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
+               [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
+               [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]]
+
+    def __init__(self, split, **kwargs):
+        super(PascalContextDataset, self).__init__(
+            img_suffix='.jpg',
+            seg_map_suffix='.png',
+            split=split,
+            reduce_zero_label=False,
+            **kwargs)
+        assert osp.exists(self.img_dir) and self.split is not None
--- a/mmseg/models/segmentors/encoder_decoder.py
+++ b/mmseg/models/segmentors/encoder_decoder.py
@ -167,13 +167,15 @@ class EncoderDecoder(BaseSegmentor):

    # TODO refactor
    def slide_inference(self, img, img_meta, rescale):
-        """Inference by sliding-window with overlap."""
+        """Inference by sliding-window with overlap.
+
+        If h_crop > h_img or w_crop > w_img, the small patch will be used to
+        decode without padding.
+        """

        h_stride, w_stride = self.test_cfg.stride
        h_crop, w_crop = self.test_cfg.crop_size
        batch_size, _, h_img, w_img = img.size()
-        assert h_crop <= h_img and w_crop <= w_img, (
-            'crop size should not greater than image size')
        num_classes = self.num_classes
        h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
        w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
--- a/setup.cfg
+++ b/setup.cfg
@ -8,6 +8,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmseg
-known_third_party = PIL,cityscapesscripts,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
+known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
--- a/tools/convert_datasets/pascal_context.py
+++ b/tools/convert_datasets/pascal_context.py
@ -0,0 +1,86 @@
+import argparse
+import os.path as osp
+from functools import partial
+
+import mmcv
+import numpy as np
+from detail import Detail
+from PIL import Image
+
+_mapping = np.sort(
+    np.array([
+        0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
+        158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
+        440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
+        85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
+    ]))
+_key = np.array(range(len(_mapping))).astype('uint8')
+
+
+def generate_labels(img_id, detail, out_dir):
+
+    def _class_to_index(mask, _mapping, _key):
+        # assert the values
+        values = np.unique(mask)
+        for i in range(len(values)):
+            assert (values[i] in _mapping)
+        index = np.digitize(mask.ravel(), _mapping, right=True)
+        return _key[index].reshape(mask.shape)
+
+    mask = Image.fromarray(
+        _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
+    filename = img_id['file_name']
+    mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
+    return osp.splitext(osp.basename(filename))[0]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmdetection format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('json_path', help='annoation json filepath')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
+    else:
+        out_dir = args.out_dir
+    json_path = args.json_path
+    mmcv.mkdir_or_exist(out_dir)
+    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
+
+    train_detail = Detail(json_path, img_dir, 'train')
+    train_ids = train_detail.getImgs()
+
+    val_detail = Detail(json_path, img_dir, 'val')
+    val_ids = val_detail.getImgs()
+
+    mmcv.mkdir_or_exist(
+        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
+
+    train_list = mmcv.track_progress(
+        partial(generate_labels, detail=train_detail, out_dir=out_dir),
+        train_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'train.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(train_list))
+
+    val_list = mmcv.track_progress(
+        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'val.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(val_list))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()