[Feature] Support ISPRS Vaihingen Dataset. (#1171)

* Add Vaihingen

* upload models&logs of vaihingen

* fix unit test

* fix dataset pipeline

* fix unit test coverage

* fix vaihingen docstring
This commit is contained in:
MengzhangLI 2022-01-22 20:27:51 +08:00 committed by GitHub
parent 6a22c428c2
commit 4b905cbe2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 609 additions and 30 deletions

View File

@ -0,0 +1,54 @@
# dataset settings
dataset_type = 'ISPRSDataset'
data_root = 'data/vaihingen'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/train',
ann_dir='ann_dir/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=test_pipeline))

View File

@ -112,6 +112,14 @@ Spatial pyramid pooling module or encode-decoder structure are used in deep neur
| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) | | DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) |
| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) | | DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) |
### Vaihingen
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 72.79 | 72.50 | 74.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json) |
| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.91 | 73.97 | 75.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) |
| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 18.59 | 73.06 | 74.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) |
Note: Note:
- `FP16` means Mixed Precision (FP16) is adopted in training. - `FP16` means Mixed Precision (FP16) is adopted in training.

View File

@ -9,6 +9,7 @@ Collections:
- Pascal Context 59 - Pascal Context 59
- LoveDA - LoveDA
- Potsdam - Potsdam
- Vaihingen
Paper: Paper:
URL: https://arxiv.org/abs/1802.02611 URL: https://arxiv.org/abs/1802.02611
Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation
@ -736,3 +737,69 @@ Models:
mIoU(ms+flip): 79.47 mIoU(ms+flip): 79.47
Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth
- Name: deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen
In Collection: deeplabv3plus
Metadata:
backbone: R-18-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 13.74
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 1.91
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 72.5
mIoU(ms+flip): 74.13
Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth
- Name: deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen
In Collection: deeplabv3plus
Metadata:
backbone: R-50-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 37.16
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 7.36
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 73.97
mIoU(ms+flip): 75.05
Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth
- Name: deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen
In Collection: deeplabv3plus
Metadata:
backbone: R-101-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 53.79
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 10.83
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 73.06
mIoU(ms+flip): 74.14
Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth

View File

@ -0,0 +1,2 @@
_base_ = './deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,11 @@
_base_ = './deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py'
model = dict(
pretrained='open-mmlab://resnet18_v1c',
backbone=dict(depth=18),
decode_head=dict(
c1_in_channels=64,
c1_channels=12,
in_channels=512,
channels=128,
),
auxiliary_head=dict(in_channels=256, channels=64))

View File

@ -0,0 +1,7 @@
_base_ = [
'../_base_/models/deeplabv3plus_r50-d8.py',
'../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py',
'../_base_/schedules/schedule_80k.py'
]
model = dict(
decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6))

View File

@ -100,3 +100,11 @@ High-resolution representations are essential for position-sensitive vision prob
| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) | | FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) |
| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) | | FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) |
| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) | | FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) |
### Vaihingen
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 38.11 | 71.81 | 73.1 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json) |
| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.55 | 72.57 | 74.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json) |
| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 17.25 | 72.50 | 73.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json) |

View File

@ -0,0 +1,5 @@
_base_ = [
'../_base_/models/fcn_hr18.py', '../_base_/datasets/vaihingen.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
model = dict(decode_head=dict(num_classes=6))

View File

@ -0,0 +1,9 @@
_base_ = './fcn_hr18_4x4_512x512_80k_vaihingen.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w18_small',
backbone=dict(
extra=dict(
stage1=dict(num_blocks=(2, )),
stage2=dict(num_blocks=(2, 2)),
stage3=dict(num_modules=3, num_blocks=(2, 2, 2)),
stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2)))))

View File

@ -0,0 +1,10 @@
_base_ = './fcn_hr18_4x4_512x512_80k_vaihingen.py'
model = dict(
pretrained='open-mmlab://msra/hrnetv2_w48',
backbone=dict(
extra=dict(
stage2=dict(num_channels=(48, 96)),
stage3=dict(num_channels=(48, 96, 192)),
stage4=dict(num_channels=(48, 96, 192, 384)))),
decode_head=dict(
in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384])))

View File

@ -9,6 +9,7 @@ Collections:
- Pascal Context 59 - Pascal Context 59
- LoveDA - LoveDA
- Potsdam - Potsdam
- Vaihingen
Paper: Paper:
URL: https://arxiv.org/abs/1908.07919 URL: https://arxiv.org/abs/1908.07919
Title: Deep High-Resolution Representation Learning for Human Pose Estimation Title: Deep High-Resolution Representation Learning for Human Pose Estimation
@ -581,3 +582,69 @@ Models:
mIoU(ms+flip): 79.34 mIoU(ms+flip): 79.34
Config: configs/hrnet/fcn_hr48_512x512_80k_potsdam.py Config: configs/hrnet/fcn_hr48_512x512_80k_potsdam.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth
- Name: fcn_hr18s_4x4_512x512_80k_vaihingen
In Collection: hrnet
Metadata:
backbone: HRNetV2p-W18-Small
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 26.24
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 1.58
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 71.81
mIoU(ms+flip): 73.1
Config: configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth
- Name: fcn_hr18_4x4_512x512_80k_vaihingen
In Collection: hrnet
Metadata:
backbone: HRNetV2p-W18
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 51.15
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 2.76
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 72.57
mIoU(ms+flip): 74.09
Config: configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth
- Name: fcn_hr48_4x4_512x512_80k_vaihingen
In Collection: hrnet
Metadata:
backbone: HRNetV2p-W48
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 57.97
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 6.2
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 72.5
mIoU(ms+flip): 73.52
Config: configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth

View File

@ -141,6 +141,14 @@ We support evaluation results on these two datasets using models above trained o
| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) | | PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) |
| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | | PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) |
### Vaihingen
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 85.06 | 71.46 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) |
| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.29 | 72.36 | 73.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) |
| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.97 | 72.61 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json) |
Note: Note:
- `FP16` means Mixed Precision (FP16) is adopted in training. - `FP16` means Mixed Precision (FP16) is adopted in training.

View File

@ -12,6 +12,7 @@ Collections:
- COCO-Stuff 164k - COCO-Stuff 164k
- LoveDA - LoveDA
- Potsdam - Potsdam
- Vaihingen
Paper: Paper:
URL: https://arxiv.org/abs/1612.01105 URL: https://arxiv.org/abs/1612.01105
Title: Pyramid Scene Parsing Network Title: Pyramid Scene Parsing Network
@ -875,3 +876,69 @@ Models:
mIoU(ms+flip): 79.47 mIoU(ms+flip): 79.47
Config: configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py Config: configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth
- Name: pspnet_r18-d8_4x4_512x512_80k_vaihingen
In Collection: pspnet
Metadata:
backbone: R-18-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 11.76
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 1.45
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 71.46
mIoU(ms+flip): 73.36
Config: configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth
- Name: pspnet_r50-d8_4x4_512x512_80k_vaihingen
In Collection: pspnet
Metadata:
backbone: R-50-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 33.01
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 6.14
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 72.36
mIoU(ms+flip): 73.75
Config: configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth
- Name: pspnet_r101-d8_4x4_512x512_80k_vaihingen
In Collection: pspnet
Metadata:
backbone: R-101-D8
crop size: (512,512)
lr schd: 80000
inference time (ms/im):
- value: 50.08
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (512,512)
Training Memory (GB): 9.61
Results:
- Task: Semantic Segmentation
Dataset: Vaihingen
Metrics:
mIoU: 72.61
mIoU(ms+flip): 74.18
Config: configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth

View File

@ -0,0 +1,2 @@
_base_ = './pspnet_r50-d8_4x4_512x512_80k_vaihingen.py'
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))

View File

@ -0,0 +1,9 @@
_base_ = './pspnet_r50-d8_4x4_512x512_80k_vaihingen.py'
model = dict(
pretrained='open-mmlab://resnet18_v1c',
backbone=dict(depth=18),
decode_head=dict(
in_channels=512,
channels=128,
),
auxiliary_head=dict(in_channels=256, channels=64))

View File

@ -0,0 +1,6 @@
_base_ = [
'../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/vaihingen.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
]
model = dict(
decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6))

View File

@ -309,3 +309,19 @@ python tools/convert_datasets/potsdam.py /path/to/potsdam
``` ```
In our default setting, it will generate 3456 images for training and 2016 images for validation. In our default setting, it will generate 3456 images for training and 2016 images for validation.
### ISPRS Vaihingen
The [Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/)
dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Vaihingen.
The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/).
The 'ISPRS_semantic_labeling_Vaihingen.zip' and 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip' are required.
For Vaihingen dataset, please run the following command to download and re-organize the dataset.
```shell
python tools/convert_datasets/vaihingen.py /path/to/vaihingen
```
In our default setting (`clip_size` =512, `stride_size`=256), it will generate 344 images for training and 398 images for validation.

View File

@ -250,3 +250,19 @@ python tools/convert_datasets/potsdam.py /path/to/potsdam
``` ```
使用我们默认的配置, 将生成 3456 张图片的训练集和 2016 张图片的验证集。 使用我们默认的配置, 将生成 3456 张图片的训练集和 2016 张图片的验证集。
### ISPRS Vaihingen
[Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/)
数据集是一个有着2D 语义分割内容标注的城市遥感数据集。
数据集可以从挑战 [主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/).
需要其中的 'ISPRS_semantic_labeling_Vaihingen.zip' 和 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip'。
对于 Vaihingen 数据集,请运行以下命令下载并重新组织数据集
```shell
python tools/convert_datasets/vaihingen.py /path/to/vaihingen
```
使用我们默认的配置 (`clip_size` =512, `stride_size`=256) 将生成 344 张图片的训练集和 398 张图片的验证集。

View File

@ -52,22 +52,6 @@ def voc_classes():
] ]
def loveda_classes():
"""LoveDA class names for external use."""
return [
'background', 'building', 'road', 'water', 'barren', 'forest',
'agricultural'
]
def potsdam_classes():
"""Potsdam class names for external use."""
return [
'impervious_surface', 'building', 'low_vegetation', 'tree', 'car',
'clutter'
]
def cocostuff_classes(): def cocostuff_classes():
"""CocoStuff class names for external use.""" """CocoStuff class names for external use."""
return [ return [
@ -103,6 +87,30 @@ def cocostuff_classes():
] ]
def loveda_classes():
"""LoveDA class names for external use."""
return [
'background', 'building', 'road', 'water', 'barren', 'forest',
'agricultural'
]
def potsdam_classes():
"""Potsdam class names for external use."""
return [
'impervious_surface', 'building', 'low_vegetation', 'tree', 'car',
'clutter'
]
def vaihingen_classes():
"""Vaihingen class names for external use."""
return [
'impervious_surface', 'building', 'low_vegetation', 'tree', 'car',
'clutter'
]
def cityscapes_palette(): def cityscapes_palette():
"""Cityscapes palette for external use.""" """Cityscapes palette for external use."""
return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
@ -163,18 +171,6 @@ def voc_palette():
[128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
def loveda_palette():
"""LoveDA palette for external use."""
return [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255],
[159, 129, 183], [0, 255, 0], [255, 195, 128]]
def potsdam_palette():
"""Potsdam palette for external use."""
return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
[255, 255, 0], [255, 0, 0]]
def cocostuff_palette(): def cocostuff_palette():
"""CocoStuff palette for external use.""" """CocoStuff palette for external use."""
return [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], return [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192],
@ -222,12 +218,31 @@ def cocostuff_palette():
[64, 160, 64], [64, 64, 0]] [64, 160, 64], [64, 64, 0]]
def loveda_palette():
"""LoveDA palette for external use."""
return [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255],
[159, 129, 183], [0, 255, 0], [255, 195, 128]]
def potsdam_palette():
"""Potsdam palette for external use."""
return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
[255, 255, 0], [255, 0, 0]]
def vaihingen_palette():
"""Vaihingen palette for external use."""
return [[255, 255, 255], [0, 0, 255], [0, 0, 255], [0, 255, 0],
[255, 255, 0], [255, 0, 0]]
dataset_aliases = { dataset_aliases = {
'cityscapes': ['cityscapes'], 'cityscapes': ['cityscapes'],
'ade': ['ade', 'ade20k'], 'ade': ['ade', 'ade20k'],
'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'],
'loveda': ['loveda'], 'loveda': ['loveda'],
'potsdam': ['potsdam'], 'potsdam': ['potsdam'],
'vaihingen': ['vaihingen'],
'cocostuff': [ 'cocostuff': [
'cocostuff', 'cocostuff10k', 'cocostuff164k', 'coco-stuff', 'cocostuff', 'cocostuff10k', 'cocostuff164k', 'coco-stuff',
'coco-stuff10k', 'coco-stuff164k', 'coco_stuff', 'coco_stuff10k', 'coco-stuff10k', 'coco-stuff164k', 'coco_stuff', 'coco_stuff10k',

View File

@ -10,6 +10,7 @@ from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset,
RepeatDataset) RepeatDataset)
from .drive import DRIVEDataset from .drive import DRIVEDataset
from .hrf import HRFDataset from .hrf import HRFDataset
from .isprs import ISPRSDataset
from .loveda import LoveDADataset from .loveda import LoveDADataset
from .night_driving import NightDrivingDataset from .night_driving import NightDrivingDataset
from .pascal_context import PascalContextDataset, PascalContextDataset59 from .pascal_context import PascalContextDataset, PascalContextDataset59
@ -24,5 +25,5 @@ __all__ = [
'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset',
'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset',
'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset',
'PotsdamDataset' 'ISPRSDataset', 'PotsdamDataset'
] ]

25
mmseg/datasets/isprs.py Normal file
View File

@ -0,0 +1,25 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import DATASETS
from .custom import CustomDataset
@DATASETS.register_module()
class ISPRSDataset(CustomDataset):
"""ISPRS dataset.
In segmentation map annotation for LoveDA, 0 is the ignore index.
``reduce_zero_label`` should be set to True. The ``img_suffix`` and
``seg_map_suffix`` are both fixed to '.png'.
"""
CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
'car', 'clutter')
PALETTE = [[255, 255, 255], [0, 0, 255], [0, 0, 255], [0, 255, 0],
[255, 255, 0], [255, 0, 0]]
def __init__(self, **kwargs):
super(ISPRSDataset, self).__init__(
img_suffix='.png',
seg_map_suffix='.png',
reduce_zero_label=True,
**kwargs)

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 393 KiB

View File

@ -14,7 +14,7 @@ from PIL import Image
from mmseg.core.evaluation import get_classes, get_palette from mmseg.core.evaluation import get_classes, get_palette
from mmseg.datasets import (DATASETS, ADE20KDataset, CityscapesDataset, from mmseg.datasets import (DATASETS, ADE20KDataset, CityscapesDataset,
COCOStuffDataset, ConcatDataset, CustomDataset, COCOStuffDataset, ConcatDataset, CustomDataset,
LoveDADataset, MultiImageMixDataset, ISPRSDataset, LoveDADataset, MultiImageMixDataset,
PascalVOCDataset, PotsdamDataset, RepeatDataset, PascalVOCDataset, PotsdamDataset, RepeatDataset,
build_dataset) build_dataset)
@ -27,6 +27,7 @@ def test_classes():
ADE20KDataset.CLASSES) == get_classes('ade') == get_classes('ade20k') ADE20KDataset.CLASSES) == get_classes('ade') == get_classes('ade20k')
assert list(LoveDADataset.CLASSES) == get_classes('loveda') assert list(LoveDADataset.CLASSES) == get_classes('loveda')
assert list(PotsdamDataset.CLASSES) == get_classes('potsdam') assert list(PotsdamDataset.CLASSES) == get_classes('potsdam')
assert list(ISPRSDataset.CLASSES) == get_classes('vaihingen')
assert list(COCOStuffDataset.CLASSES) == get_classes('cocostuff') assert list(COCOStuffDataset.CLASSES) == get_classes('cocostuff')
with pytest.raises(ValueError): with pytest.raises(ValueError):
@ -719,6 +720,16 @@ def test_potsdam():
assert len(test_dataset) == 1 assert len(test_dataset) == 1
def test_vaihingen():
test_dataset = ISPRSDataset(
pipeline=[],
img_dir=osp.join(
osp.dirname(__file__), '../data/pseudo_vaihingen_dataset/img_dir'),
ann_dir=osp.join(
osp.dirname(__file__), '../data/pseudo_vaihingen_dataset/ann_dir'))
assert len(test_dataset) == 1
@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock) @patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock)
@patch('mmseg.datasets.CustomDataset.__getitem__', @patch('mmseg.datasets.CustomDataset.__getitem__',
MagicMock(side_effect=lambda idx: idx)) MagicMock(side_effect=lambda idx: idx))

View File

@ -0,0 +1,155 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import math
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
import numpy as np
def parse_args():
parser = argparse.ArgumentParser(
description='Convert vaihingen dataset to mmsegmentation format')
parser.add_argument('dataset_path', help='vaihingen folder path')
parser.add_argument('--tmp_dir', help='path of the temporary directory')
parser.add_argument('-o', '--out_dir', help='output path')
parser.add_argument(
'--clip_size',
type=int,
help='clipped size of image after preparation',
default=512)
parser.add_argument(
'--stride_size',
type=int,
help='stride of clipping original images',
default=256)
args = parser.parse_args()
return args
def clip_big_image(image_path, clip_save_dir, to_label=False):
# Original image of Vaihingen dataset is very large, thus pre-processing
# of them is adopted. Given fixed clip size and stride size to generate
# clipped image, the intersection of width and height is determined.
# For example, given one 5120 x 5120 original image, the clip size is
# 512 and stride size is 256, thus it would generate 20x20 = 400 images
# whose size are all 512x512.
image = mmcv.imread(image_path)
h, w, c = image.shape
cs = args.clip_size
ss = args.stride_size
num_rows = math.ceil((h - cs) / ss) if math.ceil(
(h - cs) / ss) * ss + cs >= h else math.ceil((h - cs) / ss) + 1
num_cols = math.ceil((w - cs) / ss) if math.ceil(
(w - cs) / ss) * ss + cs >= w else math.ceil((w - cs) / ss) + 1
x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
xmin = x * cs
ymin = y * cs
xmin = xmin.ravel()
ymin = ymin.ravel()
xmin_offset = np.where(xmin + cs > w, w - xmin - cs, np.zeros_like(xmin))
ymin_offset = np.where(ymin + cs > h, h - ymin - cs, np.zeros_like(ymin))
boxes = np.stack([
xmin + xmin_offset, ymin + ymin_offset,
np.minimum(xmin + cs, w),
np.minimum(ymin + cs, h)
],
axis=1)
if to_label:
color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
[255, 255, 0], [0, 255, 0], [0, 255, 255],
[0, 0, 255]])
flatten_v = np.matmul(
image.reshape(-1, c),
np.array([2, 3, 4]).reshape(3, 1))
out = np.zeros_like(flatten_v)
for idx, class_color in enumerate(color_map):
value_idx = np.matmul(class_color,
np.array([2, 3, 4]).reshape(3, 1))
out[flatten_v == value_idx] = idx
image = out.reshape(h, w)
for box in boxes:
start_x, start_y, end_x, end_y = box
clipped_image = image[start_y:end_y,
start_x:end_x] if to_label else image[
start_y:end_y, start_x:end_x, :]
area_idx = osp.basename(image_path).split('_')[3].strip('.tif')
mmcv.imwrite(
clipped_image.astype(np.uint8),
osp.join(clip_save_dir,
f'{area_idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
def main():
splits = {
'train': [
'area1', 'area11', 'area13', 'area15', 'area17', 'area21',
'area23', 'area26', 'area28', 'area3', 'area30', 'area32',
'area34', 'area37', 'area5', 'area7'
],
'val': [
'area6', 'area24', 'area35', 'area16', 'area14', 'area22',
'area10', 'area4', 'area2', 'area20', 'area8', 'area31', 'area33',
'area27', 'area38', 'area12', 'area29'
],
}
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join('data', 'vaihingen')
else:
out_dir = args.out_dir
print('Making directories...')
mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
print('Find the data', zipp_list)
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for zipp in zipp_list:
zip_file = zipfile.ZipFile(zipp)
zip_file.extractall(tmp_dir)
src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
if 'ISPRS_semantic_labeling_Vaihingen' in zipp:
src_path_list = glob.glob(
os.path.join(os.path.join(tmp_dir, 'top'), '*.tif'))
if 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE' in zipp: # noqa
src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
# delete unused area9 ground truth
for area_ann in src_path_list:
if 'area9' in area_ann:
src_path_list.remove(area_ann)
prog_bar = mmcv.ProgressBar(len(src_path_list))
for i, src_path in enumerate(src_path_list):
area_idx = osp.basename(src_path).split('_')[3].strip('.tif')
data_type = 'train' if area_idx in splits['train'] else 'val'
if 'noBoundary' in src_path:
dst_dir = osp.join(out_dir, 'ann_dir', data_type)
clip_big_image(src_path, dst_dir, to_label=True)
else:
dst_dir = osp.join(out_dir, 'img_dir', data_type)
clip_big_image(src_path, dst_dir, to_label=False)
prog_bar.update()
print('Removing the temporary files...')
print('Done!')
if __name__ == '__main__':
args = parse_args()
main()