Merge branch 'dev-1.x' of github.com:open-mmlab/mmsegmentation into 1.x

This commit is contained in:
MeowZheng 2023-03-03 16:55:55 +08:00
commit c5a4121262
131 changed files with 2470 additions and 1620 deletions

View File

@ -2,25 +2,25 @@
# Inference Speed is tested on NVIDIA V100
hrnet = [
dict(
config='configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py',
config='configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py',
checkpoint='fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth', # noqa
eval='mIoU',
metric=dict(mIoU=33.0),
),
dict(
config='configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py',
config='configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py',
checkpoint='fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth', # noqa
eval='mIoU',
metric=dict(mIoU=76.31),
),
dict(
config='configs/hrnet/fcn_hr48_512x512_160k_ade20k.py',
config='configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py',
checkpoint='fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth',
eval='mIoU',
metric=dict(mIoU=42.02),
),
dict(
config='configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py',
config='configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py',
checkpoint='fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth', # noqa
eval='mIoU',
metric=dict(mIoU=80.65),
@ -28,25 +28,25 @@ hrnet = [
]
pspnet = [
dict(
config='configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py',
config='configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py',
checkpoint='pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth', # noqa
eval='mIoU',
metric=dict(mIoU=78.55),
),
dict(
config='configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py',
config='configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py',
checkpoint='pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth', # noqa
eval='mIoU',
metric=dict(mIoU=79.76),
),
dict(
config='configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py',
config='configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py',
checkpoint='pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth', # noqa
eval='mIoU',
metric=dict(mIoU=44.39),
),
dict(
config='configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py',
config='configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py',
checkpoint='pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth', # noqa
eval='mIoU',
metric=dict(mIoU=42.48),
@ -54,13 +54,13 @@ pspnet = [
]
resnest = [
dict(
config='configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py',
config='configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py', # noqa
checkpoint='pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth', # noqa
eval='mIoU',
metric=dict(mIoU=45.44),
),
dict(
config='configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py',
config='configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py', # noqa
checkpoint='pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth', # noqa
eval='mIoU',
metric=dict(mIoU=78.57),
@ -68,7 +68,7 @@ resnest = [
]
fastscnn = [
dict(
config='configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py',
config='configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py',
checkpoint='fast_scnn_8x4_160k_lr0.12_cityscapes-0cec9937.pth',
eval='mIoU',
metric=dict(mIoU=70.96),
@ -76,25 +76,25 @@ fastscnn = [
]
deeplabv3plus = [
dict(
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py', # noqa
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py', # noqa
checkpoint='deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth', # noqa
eval='mIoU',
metric=dict(mIoU=80.98),
),
dict(
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py', # noqa
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py', # noqa
checkpoint='deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth', # noqa
eval='mIoU',
metric=dict(mIoU=80.97),
),
dict(
config='configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py', # noqa
config='configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py', # noqa
checkpoint='deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth', # noqa
eval='mIoU',
metric=dict(mIoU=80.09),
),
dict(
config='configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py', # noqa
config='configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py', # noqa
checkpoint='deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth', # noqa
eval='mIoU',
metric=dict(mIoU=79.83),
@ -102,13 +102,13 @@ deeplabv3plus = [
]
vit = [
dict(
config='configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py',
config='configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py', # noqa
checkpoint='upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth',
eval='mIoU',
metric=dict(mIoU=47.73),
),
dict(
config='configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py',
config='configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py', # noqa
checkpoint='upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth',
eval='mIoU',
metric=dict(mIoU=43.52),
@ -116,7 +116,7 @@ vit = [
]
fp16 = [
dict(
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py', # noqa
config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py', # noqa
checkpoint='deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth', # noqa
eval='mIoU',
metric=dict(mIoU=80.46),
@ -124,7 +124,7 @@ fp16 = [
]
swin = [
dict(
config='configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py', # noqa
config='configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py', # noqa
checkpoint='upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth', # noqa
eval='mIoU',
metric=dict(mIoU=44.41),

View File

@ -1,19 +1,19 @@
configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py
configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py
configs/hrnet/fcn_hr48_512x512_160k_ade20k.py
configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py
configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py
configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py
configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py
configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py
configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py
configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py
configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py
configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py
configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py
configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py
configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py
configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py
configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py
configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py
configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py
configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py
configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py
configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py
configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py
configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py
configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py
configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py
configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py
configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py
configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py
configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py
configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py
configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py

View File

@ -17,8 +17,6 @@
</sup>
</div>
<div>&nbsp;</div>
</div>
<br />
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmsegmentation)](https://pypi.org/project/mmsegmentation/)
[![PyPI](https://img.shields.io/pypi/v/mmsegmentation)](https://pypi.org/project/mmsegmentation)
@ -33,6 +31,22 @@ Documentation: <https://mmsegmentation.readthedocs.io/en/1.x/>
English | [简体中文](README_zh-CN.md)
</div>
<div align="center">
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218352562-cdded397-b0f3-4ca1-b8dd-a60df8dca75b.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
</div>
## Introduction
MMSegmentation is an open source semantic segmentation toolbox based on PyTorch.
@ -62,11 +76,11 @@ The 1.x branch works with **PyTorch 1.6+**.
## What's New
v1.0.0rc5 was released on 01/02/2023.
v1.0.0rc6 was released on 03/03/2023.
Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history.
- Support ISNet (ICCV'2021) in projects ([#2400](https://github.com/open-mmlab/mmsegmentation/pull/2400))
- Support HSSN (CVPR'2022) in projects ([#2444](https://github.com/open-mmlab/mmsegmentation/pull/2444))
- Support MMSegInferencer ([#2413](https://github.com/open-mmlab/mmsegmentation/pull/2413), [#2658](https://github.com/open-mmlab/mmsegmentation/pull/2658))
- Support REFUGE dataset ([#2554](https://github.com/open-mmlab/mmsegmentation/pull/2554))
## Installation
@ -81,13 +95,14 @@ There are also [advanced tutorials](https://mmsegmentation.readthedocs.io/en/dev
A Colab tutorial is also provided. You may preview the notebook [here](demo/MMSegmentation_Tutorial.ipynb) or directly [run](https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/1.x/demo/MMSegmentation_Tutorial.ipynb) on Colab.
To migrate from MMSegmentation 1.x, please refer to [migration](docs/en/migration.md).
To migrate from MMSegmentation 1.x, please refer to [migration](docs/en/migration).
## Benchmark and model zoo
Results and models are available in the [model zoo](docs/en/model_zoo.md).
Supported backbones:
<details open>
<summary>Supported backbones:</summary>
- [x] ResNet (CVPR'2016)
- [x] ResNeXt (CVPR'2017)
@ -103,7 +118,10 @@ Supported backbones:
- [x] [MAE (CVPR'2022)](configs/mae)
- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
Supported methods:
</details>
<details open>
<summary>Supported methods:</summary>
- [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn)
- [x] [ERFNet (T-ITS'2017)](configs/erfnet)
@ -142,7 +160,10 @@ Supported methods:
- [x] [MaskFormer (NeurIPS'2021)](configs/maskformer)
- [x] [Mask2Former (CVPR'2022)](configs/mask2former)
Supported datasets:
</details>
<details open>
<summary>Supported datasets:</summary>
- [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#cityscapes)
- [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#pascal-voc)
@ -161,8 +182,14 @@ Supported datasets:
- [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#isprs-vaihingen)
- [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#isaid)
</details>
Please refer to [FAQ](docs/en/notes/faq.md) for frequently asked questions.
## Projects
[Here](projects/README.md) are some implementations of SOTA models and solutions built on MMSegmentation, which are supported and maintained by community users. These projects demonstrate the best practices based on MMSegmentation for research and product development. We welcome and appreciate all the contributions to OpenMMLab ecosystem.
## Contributing
We appreciate all contributions to improve MMSegmentation. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline.
@ -191,7 +218,7 @@ If you find this project useful in your research, please consider cite:
This project is released under the [Apache 2.0 license](LICENSE).
## Projects in OpenMMLab
## OpenMMLab Family
- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.

View File

@ -61,7 +61,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
## 更新日志
最新版本 v1.0.0rc5 在 2023.02.01 发布。
最新版本 v1.0.0rc6 在 2023.03.03 发布。
如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/notes/changelog.md)。
## 安装
@ -82,7 +82,8 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
测试结果和模型可以在[模型库](docs/zh_cn/model_zoo.md)中找到。
已支持的骨干网络:
<details open>
<summary>已支持的骨干网络:</summary>
- [x] ResNet (CVPR'2016)
- [x] ResNeXt (CVPR'2017)
@ -98,7 +99,10 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [MAE (CVPR'2022)](configs/mae)
- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
已支持的算法:
</details>
<details open>
<summary>已支持的算法:</summary>
- [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn)
- [x] [ERFNet (T-ITS'2017)](configs/erfnet)
@ -137,7 +141,10 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [MaskFormer (NeurIPS'2021)](configs/maskformer)
- [x] [Mask2Former (CVPR'2022)](configs/mask2former)
已支持的数据集:
</details>
<details open>
<summary>已支持的数据集:</summary>
- [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#cityscapes)
- [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#pascal-voc)
@ -156,15 +163,22 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#isprs-vaihingen)
- [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#isaid)
</details>
如果遇到问题,请参考 [常见问题解答](docs/zh_cn/notes/faq.md)。
## 社区项目
[这里](projects/README.md)有一些由社区用户支持和维护的基于 MMSegmentation 的 SOTA 模型和解决方案的实现。这些项目展示了基于 MMSegmentation 的研究和产品开发的最佳实践。
我们欢迎并感谢对 OpenMMLab 生态系统的所有贡献。
## 贡献指南
我们感谢所有的贡献者为改进和提升 MMSegmentation 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
## 致谢
MMSegmentation 是一个由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。
MMSegmentation 是一个由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。
## 引用

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -26,7 +26,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -26,7 +26,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -26,7 +26,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -32,7 +32,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -28,7 +28,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -27,7 +27,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -0,0 +1,90 @@
# dataset settings
dataset_type = 'REFUGEDataset'
data_root = 'data/REFUGE'
train_img_scale = (2056, 2124)
val_img_scale = (1634, 1634)
test_img_scale = (1634, 1634)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(
type='RandomResize',
scale=train_img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=val_img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=test_img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=val_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/test', seg_map_path='annotations/test'),
pipeline=val_pipeline))
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@ -26,7 +26,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -25,7 +25,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -1,7 +1,10 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa
custom_imports = dict(imports='mmcls.models', allow_failed_imports=False)
# TODO: delete custom_imports after mmcls supports auto import
# please install mmcls>=1.0
# import mmcls.models to trigger register_module in mmcls
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],

View File

@ -1,7 +1,5 @@
_base_ = ['../_base_/default_runtime.py', '../_base_/datasets/cityscapes.py']
custom_imports = dict(imports='mmdet.models', allow_failed_imports=False)
crop_size = (512, 1024)
data_preprocessor = dict(
type='SegDataPreProcessor',

View File

@ -3,7 +3,6 @@ _base_ = [
]
pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa
custom_imports = dict(imports='mmdet.models', allow_failed_imports=False)
crop_size = (640, 640)
data_preprocessor = dict(

View File

@ -39,12 +39,12 @@ The MobileNetV2 architecture is based on an inverted residual structure where th
### Cityscapes
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 61.54 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) |
| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) |
| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) |
| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) |
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 71.19 | 73.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json) |
| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) |
| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) |
| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) |
### ADE20K

View File

@ -17,9 +17,10 @@ Models:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 61.54
mIoU: 71.19
mIoU(ms+flip): 73.34
Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth
- Name: mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024
In Collection: PSPNet
Metadata:

View File

@ -46,17 +46,17 @@ In this paper, we address the problem of semantic segmentation and focus on the
#### HRNet backbone
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 74.30 | 75.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) |
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 76.61 | 78.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) |
| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) |
| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) |
| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) |
#### ResNet backbone

View File

@ -33,10 +33,10 @@ Models:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
mIoU: 74.3
mIoU(ms+flip): 75.95
mIoU: 76.61
mIoU(ms+flip): 78.01
Config: configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth
- Name: ocrnet_hr18_4xb2-40k_cityscapes-512x1024
In Collection: OCRNet
Metadata:

View File

@ -460,12 +460,8 @@
"outputs": [],
"source": [
"from mmengine.runner import Runner\n",
"from mmseg.utils import register_all_modules\n",
"\n",
"# register all modules in mmseg into the registries\n",
"# do not init the default scope here because it will be init in the runner\n",
"register_all_modules(init_default_scope=False)\n",
"runner = Runner.from_cfg(cfg)\n"
"runner = Runner.from_cfg(cfg)"
]
},
{
@ -523,7 +519,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.8.5 ('tensorflow')",
"display_name": "Python 3.10.6 ('pt1.12')",
"language": "python",
"name": "python3"
},
@ -537,7 +533,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.10.6"
},
"pycharm": {
"stem_cell": {
@ -550,7 +546,7 @@
},
"vscode": {
"interpreter": {
"hash": "20d4b83e0c8b3730b580c42434163d64f4b735d580303a8fade7c849d4d29eba"
"hash": "0442e67aee3d9cbb788fa6e86d60c4ffa94ad7f1943c65abfecb99a6f4696c58"
}
}
},

View File

@ -4,7 +4,6 @@ from argparse import ArgumentParser
from mmengine.model import revert_sync_batchnorm
from mmseg.apis import inference_model, init_model, show_result_pyplot
from mmseg.utils import register_all_modules
def main():
@ -24,8 +23,6 @@ def main():
'--title', default='result', help='The image identifier.')
args = parser.parse_args()
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
if args.device == 'cpu':

View File

@ -0,0 +1,45 @@
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from mmseg.apis import MMSegInferencer
def main():
parser = ArgumentParser()
parser.add_argument('img', help='Image file')
parser.add_argument('model', help='Config file')
parser.add_argument('--checkpoint', default=None, help='Checkpoint file')
parser.add_argument(
'--out-dir', default='', help='Path to save result file')
parser.add_argument(
'--show',
action='store_true',
default=False,
help='Whether to display the drawn image.')
parser.add_argument(
'--dataset-name',
default='cityscapes',
help='Color palette used for segmentation map')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--opacity',
type=float,
default=0.5,
help='Opacity of painted segmentation map. In (0, 1] range.')
args = parser.parse_args()
# build the model from a config file and a checkpoint file
mmseg_inferencer = MMSegInferencer(
args.model,
args.checkpoint,
dataset_name=args.dataset_name,
device=args.device)
# test a single image
mmseg_inferencer(
args.img, show=args.show, out_dir=args.out_dir, opacity=args.opacity)
if __name__ == '__main__':
main()

File diff suppressed because one or more lines are too long

View File

@ -6,7 +6,6 @@ from mmengine.model.utils import revert_sync_batchnorm
from mmseg.apis import inference_model, init_model
from mmseg.apis.inference import show_result_pyplot
from mmseg.utils import register_all_modules
def main():
@ -53,8 +52,6 @@ def main():
assert args.show or args.output_file, \
'At least one output should be enabled.'
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
if args.device == 'cpu':

View File

@ -4,7 +4,7 @@ ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ARG MMCV="2.0.0rc4"
ARG MMSEG="1.0.0rc5"
ARG MMSEG="1.0.0rc6"
ENV PYTHONUNBUFFERED TRUE

View File

@ -1,4 +1,4 @@
# Add New Datasets
# \[WIP\] Add New Datasets
## Customize datasets by reorganizing data

View File

@ -0,0 +1 @@
# Add New Metrics

View File

@ -1,37 +0,0 @@
# Adding New Data Transforms
1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict.
```python
from mmseg.datasets import TRANSFORMS
@TRANSFORMS.register_module()
class MyTransform:
def transform(self, results):
results['dummy'] = True
return results
```
2. Import the new class.
```python
from .my_pipeline import MyTransform
```
3. Use it in config files.
```python
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='MyTransform'),
dict(type='PackSegInputs'),
]
```

View File

@ -0,0 +1,52 @@
# Adding New Data Transforms
## Customization data transformation
The customized data transformation must inherited from `BaseTransform` and implement `transform` function.
Here we use a simple flipping transformation as example:
```python
import random
import mmcv
from mmcv.transforms import BaseTransform, TRANSFORMS
@TRANSFORMS.register_module()
class MyFlip(BaseTransform):
def __init__(self, direction: str):
super().__init__()
self.direction = direction
def transform(self, results: dict) -> dict:
img = results['img']
results['img'] = mmcv.imflip(img, direction=self.direction)
return results
```
Moreover, import the new class.
```python
from .my_pipeline import MyFlip
```
Thus, we can instantiate a `MyFlip` object and use it to process the data dict.
```python
import numpy as np
transform = MyFlip(direction='horizontal')
data_dict = {'img': np.random.rand(224, 224, 3)}
data_dict = transform(data_dict)
processed_img = data_dict['img']
```
Or, we can use `MyFlip` transformation in data pipeline in our config file.
```python
pipeline = [
...
dict(type='MyFlip', direction='horizontal'),
...
]
```
Note that if you want to use `MyFlip` in config, you must ensure the file containing `MyFlip` is imported during runtime.

View File

@ -1,30 +1,83 @@
# Customize Runtime Settings
## Customize optimization settings
## Customize hooks
### Customize optimizer supported by Pytorch
### Step 1: Implement a new hook
We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following.
MMEngine has implemented commonly used [hooks](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md) for training and test,
When users have requirements for customization, they can follow examples below.
For example, if some hyper-parameter of the model needs to be changed when model training, we can implement a new hook for it:
```python
optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence
from mmengine.hooks import Hook
from mmengine.model import is_model_wrapper
from mmseg.registry import HOOKS
@HOOKS.register_module()
class NewHook(Hook):
"""Docstring for NewHook.
"""
def __init__(self, a: int, b: int) -> None:
self.a = a
self.b = b
def before_train_iter(self,
runner,
batch_idx: int,
data_batch: Optional[Sequence[dict]] = None) -> None:
cur_iter = runner.iter
# acquire this model when it is in a wrapper
if is_model_wrapper(runner.model):
model = runner.model.module
model.hyper_parameter = self.a * cur_iter + self.b
```
To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
### Step 2: Import a new hook
### Customize self-implemented optimizer
The module which is defined above needs to be imported into main namespace first to ensure being registered.
We assume `NewHook` is implemented in `mmseg/engine/hooks/new_hook.py`, there are two ways to import it:
#### 1. Define a new optimizer
A customized optimizer could be defined as following.
Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
You need to create a new directory named `mmseg/core/optimizer`.
And then implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`:
- Import it by modifying `mmseg/engine/hooks/__init__.py`.
Modules should be imported in `mmseg/engine/hooks/__init__.py` thus these new modules can be found and added by registry.
```python
from .registry import OPTIMIZERS
from .new_hook import NewHook
__all__ = [..., NewHook]
```
- Import it manually by `custom_imports` in config file.
```python
custom_imports = dict(imports=['mmseg.engine.hooks.new_hook'], allow_failed_imports=False)
```
### Step 3: Modify config file
Users can set and use customized hooks in training and test followed methods below.
The execution priority of hooks at the same place of `Runner` can be referred [here](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md#built-in-hooks),
Default priority of customized hook is `NORMAL`.
```python
custom_hooks = [
dict(type='NewHook', a=a_value, b=b_value, priority='ABOVE_NORMAL')
]
```
## Customize optimizer
### Step 1: Implement a new optimizer
We recommend the customized optimizer implemented in `mmseg/engine/optimizers/my_optimizer.py`. Here is an example of a new optimizer `MyOptimizer` which has parameters `a`, `b` and `c`:
```python
from mmseg.registry import OPTIMIZERS
from torch.optim import Optimizer
@ -32,214 +85,84 @@ from torch.optim import Optimizer
class MyOptimizer(Optimizer):
def __init__(self, a, b, c)
```
#### 2. Add the optimizer to registry
### Step 2: Import a new optimizer
To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it.
The module which is defined above needs to be imported into main namespace first to ensure being registered.
We assume `MyOptimizer` is implemented in `mmseg/engine/optimizers/my_optimizer.py`, there are two ways to import it:
- Modify `mmseg/core/optimizer/__init__.py` to import it.
The newly defined module should be imported in `mmseg/core/optimizer/__init__.py` so that the registry will
find the new module and add it:
- Import it by modifying `mmseg/engine/optimizers/__init__.py`.
Modules should be imported in `mmseg/engine/optimizers/__init__.py` thus these new modules can be found and added by registry.
```python
from .my_optimizer import MyOptimizer
```
- Use `custom_imports` in the config to manually import it
- Import it manually by `custom_imports` in config file.
```python
custom_imports = dict(imports=['mmseg.core.optimizer.my_optimizer'], allow_failed_imports=False)
custom_imports = dict(imports=['mmseg.engine.optimizers.my_optimizer'], allow_failed_imports=False)
```
The module `mmseg.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
Note that only the package containing the class `MyOptimizer` should be imported.
`mmseg.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly.
### Step 3: Modify config file
Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`.
#### 3. Specify the optimizer in the config file
Then you can use `MyOptimizer` in `optimizer` field of config files.
In the configs, the optimizers are defined by the field `optimizer` like the following:
Then it needs to modify `optimizer` in `optim_wrapper` of config file, if users want to use customized `MyOptimizer`, it can be modified as:
```python
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optim_wrapper = dict(type='OptimWrapper',
optimizer=dict(type='MyOptimizer',
a=a_value, b=b_value, c=c_value),
clip_grad=None)
```
To use your own optimizer, the field can be changed to
## Customize optimizer constructor
### Step 1: Implement a new optimizer constructor
Optimizer constructor is used to create optimizer and optimizer wrapper for model training, which has powerful functions like specifying learning rate and weight decay for different model layers.
Here is an example for a customized optimizer constructor.
```python
optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
```
### Customize optimizer constructor
Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
The users can do those fine-grained parameter tuning through customizing optimizer constructor.
```python
from mmcv.utils import build_from_cfg
from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
from mmseg.utils import get_root_logger
from .my_optimizer import MyOptimizer
@OPTIMIZER_BUILDERS.register_module()
class MyOptimizerConstructor(object):
from mmengine.optim import DefaultOptimWrapperConstructor
from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS
@OPTIM_WRAPPER_CONSTRUCTORS.register_module()
class LearningRateDecayOptimizerConstructor(DefaultOptimWrapperConstructor):
def __init__(self, optim_wrapper_cfg, paramwise_cfg=None):
def __call__(self, model):
return my_optimizer
```
The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11), which could also serve as a template for new optimizer constructor.
Default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19).
It can also be used as base class of new optimizer constructor.
### Additional settings
### Step 2: Import a new optimizer constructor
Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
The module which is defined above needs to be imported into main namespace first to ensure being registered.
We assume `MyOptimizerConstructor` is implemented in `mmseg/engine/optimizers/my_optimizer_constructor.py`, there are two ways to import it:
- __Use gradient clip to stabilize training__:
Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
```python
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
```
If your config inherits the base config which already sets the `optimizer_config`, you might need `_delete_=True` to override the unnecessary settings. See the [config documentation](https://mmsegmentation.readthedocs.io/en/latest/config.html) for more details.
- __Use momentum schedule to accelerate model convergence__:
We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence.
For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327) and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130).
```python
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
```
## Customize training schedules
By default we use step learning rate with 40k/80k schedule, this calls [`PolyLrUpdaterHook`](https://github.com/open-mmlab/mmcv/blob/826d3a7b68596c824fa1e2cb89b6ac274f52179c/mmcv/runner/hooks/lr_updater.py#L196) in MMCV.
We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples
- Step schedule:
```python
lr_config = dict(policy='step', step=[9, 10])
```
- ConsineAnnealing schedule:
```python
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5)
```
## Customize workflow
Workflow is a list of (phase, epochs) to specify the running order and epochs.
By default it is set to be
- Import it by modifying `mmseg/engine/optimizers/__init__.py`.
Modules should be imported in `mmseg/engine/optimizers/__init__.py` thus these new modules can be found and added by registry.
```python
workflow = [('train', 1)]
from .my_optimizer_constructor import MyOptimizerConstructor
```
which means running 1 epoch for training.
Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set.
In such case, we can set the workflow as
- Import it manually by `custom_imports` in config file.
```python
[('train', 1), ('val', 1)]
custom_imports = dict(imports=['mmseg.engine.optimizers.my_optimizer_constructor'], allow_failed_imports=False)
```
so that 1 epoch for training and 1 epoch for validation will be run iteratively.
### Step 3: Modify config file
:::{note}
1. The parameters of model will not be updated during val epoch.
2. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow.
3. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EvalHook` because `EvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`. Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch.
:::
## Customize hooks
### Use hooks implemented in MMCV
If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below
Then it needs to modify `constructor` in `optim_wrapper` of config file, if users want to use customized `MyOptimizerConstructor`, it can be modified as:
```python
custom_hooks = [
dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
]
```
### Modify default runtime hooks
There are some common hooks that are not registered through `custom_hooks`, they are
- log_config
- checkpoint_config
- evaluation
- lr_config
- optimizer_config
- momentum_config
In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`.
The above-mentioned tutorials already covers how to modify `optimizer_config`, `momentum_config`, and `lr_config`.
Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`.
#### Checkpoint config
The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9).
```python
checkpoint_config = dict(interval=1)
```
The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook)
#### Log config
The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`.
The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook).
```python
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
```
#### Evaluation config
The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmsegmentation/blob/e3f6f655d69b777341aec2fe8829871cc0beadcb/mmseg/core/evaluation/eval_hooks.py#L7).
Except the key `interval`, other arguments such as `metric` will be passed to the `dataset.evaluate()`
```python
evaluation = dict(interval=1, metric='mIoU')
optim_wrapper = dict(type='OptimWrapper',
constructor='MyOptimizerConstructor',
clip_grad=None)
```

View File

@ -15,8 +15,8 @@ Instantiate Cityscapes training dataset:
```python
from mmseg.datasets import CityscapesDataset
from mmseg.utils import register_all_modules
register_all_modules()
from mmengine.registry import init_default_scope
init_default_scope('mmseg')
data_root = 'data/cityscapes/'
data_prefix=dict(img_path='leftImg8bit/train', seg_map_path='gtFine/train')

View File

@ -81,7 +81,7 @@ The arguments of the constructor:
- `process` method processes one batch of data and data_samples.
- `compute_metrics` method computes the metrics from processed results.
#### IoUMetric.process
### IoUMetric.process
Parameters:
@ -92,7 +92,7 @@ Returns:
This method doesn't have returns since the processed results would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed.
#### IoUMetric.compute_metrics
### IoUMetric.compute_metrics
Parameters:

View File

@ -19,7 +19,7 @@ Component Customization
.. toctree::
:maxdepth: 1
add_modules.md
add_models.md
add_datasets.md
add_transforms.md
add_metrics.md

View File

@ -1,4 +1,4 @@
# Training Tricks
# \[WIP\] Training Tricks
MMSegmentation support following training tricks out of box.

View File

@ -6,7 +6,9 @@ The structure of this guide is as follows:
- [Data Transforms](#data-transforms)
- [Design of Data pipelines](#design-of-data-pipelines)
- [Customization data transformation](#customization-data-transformation)
- [Data loading](#data-loading)
- [Pre-processing](#pre-processing)
- [Formatting](#formatting)
## Design of Data pipelines
@ -125,48 +127,3 @@ The position of random contrast is in second or second to last(mode 0 or 1 below
- add: `inputs`, `data_sample`
- remove: keys specified by `meta_keys` (merged into the metainfo of data_sample), all other keys
## Customization data transformation
The customized data transformation must inherited from `BaseTransform` and implement `transform` function.
Here we use a simple flipping transformation as example:
```python
import random
import mmcv
from mmcv.transforms import BaseTransform, TRANSFORMS
@TRANSFORMS.register_module()
class MyFlip(BaseTransform):
def __init__(self, direction: str):
super().__init__()
self.direction = direction
def transform(self, results: dict) -> dict:
img = results['img']
results['img'] = mmcv.imflip(img, direction=self.direction)
return results
```
Thus, we can instantiate a `MyFlip` object and use it to process the data dict.
```python
import numpy as np
transform = MyFlip(direction='horizontal')
data_dict = {'img': np.random.rand(224, 224, 3)}
data_dict = transform(data_dict)
processed_img = data_dict['img']
```
Or, we can use `MyFlip` transformation in data pipeline in our config file.
```python
pipeline = [
...
dict(type='MyFlip', direction='horizontal'),
...
]
```
Note that if you want to use `MyFlip` in config, you must ensure the file containing `MyFlip` is imported during runtime.

View File

@ -11,11 +11,6 @@ datasets
.. automodule:: mmseg.datasets
:members:
samplers
^^^^^^^^^^
.. automodule:: mmseg.datasets.samplers
:members:
transforms
^^^^^^^^^^^^
.. automodule:: mmseg.datasets.transforms
@ -35,7 +30,7 @@ optimizers
:members:
mmseg.evaluation
--------------
-----------------
metrics
^^^^^^^^^^

39
docs/en/device/npu.md Normal file
View File

@ -0,0 +1,39 @@
# NPU (HUAWEI Ascend)
## Usage
Please refer to the [building documentation of MMCV](https://mmcv.readthedocs.io/en/latest/get_started/build.html#build-mmcv-full-on-ascend-npu-machine) to install MMCV on NPU devices
Here we use 4 NPUs on your computer to train the model with the following command:
```shell
bash tools/dist_train.sh configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py 4
```
Also, you can use only one NPU to train the model with the following command:
```shell
python tools/train.py configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py
```
## Models Results
| Model | mIoU | Config | Download |
| :-----------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------ |
| [deeplabv3](<>) | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024_20230115_205626.json) |
| [deeplabv3plus](<>) | 79.23 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024_20230116_043450.json) |
| [hrnet](<>) | 78.1 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_hr18_4xb2-40k_cityscapes-512x1024_20230116_215821.json) |
| [fcn](<>) | 74.15 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_r50-d8_4xb2-40k_cityscapes-512x1024_20230111_083014.json) |
| [icnet](<>) | 69.25 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/icnet_r50-d8_4xb2-80k_cityscapes-832x832_20230119_002929.json) |
| [pspnet](<>) | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024_20230114_042721.json) |
| [unet](<>) | 68.86 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024_20230129_224750.json) |
| [upernet](<>) | 77.81 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/upernet_r50_4xb2-40k_cityscapes-512x1024_20230129_014634.json) |
| [apcnet](<>) | 78.02 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024_20230209_212545.json) |
| [bisenetv1](<>) | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024_20230201_023946.json) |
| [bisenetv2](<>) | 72.44 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024_20230205_215606.json) |
**Notes:**
- If not specially marked, the results on NPU with amp are the basically same as those on the GPU with FP32.
**All above models are provided by Huawei Ascend group.**

View File

@ -91,10 +91,8 @@ Option (b). If you install mmsegmentation with pip, open you python interpreter
```python
from mmseg.apis import inference_model, init_model, show_result_pyplot
from mmseg.utils import register_all_modules
import mmcv
register_all_modules()
config_file = 'pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'

View File

@ -45,13 +45,17 @@ Welcome to MMSegmentation's documentation!
notes/changelog.md
notes/faq.md
.. toctree::
:caption: Device Support
device/npu.md
.. toctree::
:caption: Switch Language
switch_language.md
Indices and tables
==================

View File

@ -237,7 +237,7 @@ test_pipeline = [
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[

View File

@ -82,7 +82,7 @@ Here is the changes of `mmseg.apis`:
| Function | Changes |
| :-------------------: | :---------------------------------------------- |
| `init_segmentor` | Renamed to `init_model` |
| `inference_segmentor` | Rename to `inference_segmentor` |
| `inference_segmentor` | Rename to `inference_model` |
| `show_result_pyplot` | Implemented based on `SegLocalVisualizer` |
| `train_model` | Removed, use `runner.train` to train. |
| `multi_gpu_test` | Removed, use `runner.test` to test. |

View File

@ -0,0 +1,102 @@
# Model Zoo Statistics
- Number of papers: 47
- ALGORITHM: 36
- BACKBONE: 11
- Number of checkpoints: 612
- \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) (16 ckpts)
- \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) (12 ckpts)
- \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit) (2 ckpts)
- \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1) (11 ckpts)
- \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2) (4 ckpts)
- \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) (16 ckpts)
- \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) (2 ckpts)
- \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext) (6 ckpts)
- \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) (16 ckpts)
- \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) (41 ckpts)
- \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) (42 ckpts)
- \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) (12 ckpts)
- \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) (12 ckpts)
- \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt) (1 ckpts)
- \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) (4 ckpts)
- \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) (12 ckpts)
- \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet) (1 ckpts)
- \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn) (12 ckpts)
- \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) (1 ckpts)
- \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) (41 ckpts)
- \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) (16 ckpts)
- \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) (37 ckpts)
- \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet) (12 ckpts)
- \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet) (16 ckpts)
- \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet) (7 ckpts)
- \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae) (1 ckpts)
- \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mask2former) (13 ckpts)
- \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/maskformer) (4 ckpts)
- \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) (8 ckpts)
- \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) (4 ckpts)
- \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) (16 ckpts)
- \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) (24 ckpts)
- \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) (4 ckpts)
- \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/poolformer) (5 ckpts)
- \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) (16 ckpts)
- \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) (54 ckpts)
- \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) (8 ckpts)
- \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer) (13 ckpts)
- \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter) (5 ckpts)
- \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) (4 ckpts)
- \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr) (7 ckpts)
- \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc) (4 ckpts)
- \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin) (6 ckpts)
- \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins) (12 ckpts)
- \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet) (25 ckpts)
- \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) (16 ckpts)
- \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit) (11 ckpts)

View File

@ -1,5 +1,40 @@
# Changelog of v1.x
## v1.0.0rc6(03/03/2023)
### Highlights
- Support MMSegInferencer ([#2413](https://github.com/open-mmlab/mmsegmentation/pull/2413), [#2658](https://github.com/open-mmlab/mmsegmentation/pull/2658))
- Support REFUGE dataset ([#2554](https://github.com/open-mmlab/mmsegmentation/pull/2554))
### Features
- Support auto import modules from registry ([#2481](https://github.com/open-mmlab/mmsegmentation/pull/2481))
- Replace numpy ascontiguousarray with torch contiguous to speed-up ([#2604](https://github.com/open-mmlab/mmsegmentation/pull/2604))
- Add browse_dataset.py tool ([#2649](https://github.com/open-mmlab/mmsegmentation/pull/2649))
### Bug fix
- Rename and Fix bug of projects HieraSeg ([#2565](https://github.com/open-mmlab/mmsegmentation/pull/2565))
- Add out_channels in `CascadeEncoderDecoder` and update OCRNet and MobileNet v2 results ([#2656](https://github.com/open-mmlab/mmsegmentation/pull/2656))
### Documentation
- Add dataflow documentation of Chinese version ([#2652](https://github.com/open-mmlab/mmsegmentation/pull/2652))
- Add custmized runtime documentation of English version ([#2533](https://github.com/open-mmlab/mmsegmentation/pull/2533))
- Add documentation for visualizing feature map using wandb backend ([#2557](https://github.com/open-mmlab/mmsegmentation/pull/2557))
- Add documentation for benchmark results on NPU (HUAWEI Ascend) ([#2569](https://github.com/open-mmlab/mmsegmentation/pull/2569), [#2596](https://github.com/open-mmlab/mmsegmentation/pull/2596), [#2610](https://github.com/open-mmlab/mmsegmentation/pull/2610))
- Fix api name error in the migration doc ([#2601](https://github.com/open-mmlab/mmsegmentation/pull/2601))
- Refine projects documentation ([#2586](https://github.com/open-mmlab/mmsegmentation/pull/2586))
- Refine MMSegmentation documentation ([#2668](https://github.com/open-mmlab/mmsegmentation/pull/2668), [#2659](https://github.com/open-mmlab/mmsegmentation/pull/2659))
### New Contributors
- @zccjjj made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2548
- @liuruiqiang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2554
- @wangjiangben-hw made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2569
- @jinxianwei made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2557
## v1.0.0rc5(02/01/2023)
### Bug fix

View File

@ -1,4 +1,4 @@
# Frequently Asked Questions (FAQ)
# \[WIP\] Frequently Asked Questions (FAQ)
We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. If the contents here do not cover your issue, please create an issue using the [provided templates](https://github.com/open-mmlab/mmsegmentation/blob/master/.github/ISSUE_TEMPLATE/error-report.md/) and make sure you fill in all required information in the template.
@ -8,16 +8,21 @@ The compatible MMSegmentation, MMCV and MMEngine versions are as below. Please i
| MMSegmentation version | MMCV version | MMEngine version | MMClassification (optional) version | MMDetection (optional) version |
| :--------------------: | :----------------------------: | :---------------: | :---------------------------------: | :----------------------------: |
| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 |
| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 |
| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 |
| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 |
| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 |
| 1.0.0rc6 | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 |
| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc6 |
| 1.0.0rc4 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5 |
| 1.0.0rc3 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 |
| 1.0.0rc2 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 |
| 1.0.0rc1 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required |
| 1.0.0rc0 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required |
Notes: To install MMSegmentation 0.x and master branch, please refer to [the faq 0.x document](https://mmsegmentation.readthedocs.io/en/latest/faq.html#installation) to check compatible versions of MMCV.
Notes:
- MMClassification and MMDetatction are optional for MMSegmentation. If you didn't install them, `ConvNeXt` (required MMClassification) and MaskFormer, Mask2Former (required MMDetection) cannot be used. We recommend to install them with source code. Please refer to [MMClasssication](https://github.com/open-mmlab/mmclassification) and [MMDetection](https://github.com/open-mmlab/mmdetection) for more details about their installation.
- To install MMSegmentation 0.x and master branch, please refer to [the faq 0.x document](https://mmsegmentation.readthedocs.io/en/latest/faq.html#installation) to check compatible versions of MMCV.
## How to know the number of GPUs needed to train the model

View File

@ -145,6 +145,15 @@ mmsegmentation
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
│ ├── REFUGE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── test
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── test
```
### Cityscapes
@ -330,7 +339,7 @@ For Potsdam dataset, please run the following command to download and re-organiz
python tools/dataset_converters/potsdam.py /path/to/potsdam
```
In our default setting, it will generate 3,456 images for training and 2,016 images for validation.
In our default setting, it will generate 3456 images for training and 2016 images for validation.
### ISPRS Vaihingen
@ -383,7 +392,7 @@ You may need to follow the following structure for dataset preparation after dow
python tools/dataset_converters/isaid.py /path/to/iSAID
```
In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33,978 images for training and 11,644 images for validation.
In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation.
## LIP(Look Into Person) dataset
@ -436,7 +445,7 @@ cd ./RawData/Training
Then create `train.txt` and `val.txt` to split dataset.
According to TransUNet, the following is the data set division.
According to TransUnet, the following is the data set division.
train.txt
@ -500,7 +509,45 @@ Then, use this command to convert synapse dataset.
python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse
```
In our default setting, it will generate 2,211 2D images for training and 1,568 2D images for validation.
Noted that MMSegmentation default evaluation metric (such as mean dice value) is calculated on 2D slice image,
which is not comparable to results of 3D scan in some paper such as [TransUNet](https://arxiv.org/abs/2102.04306).
### REFUGE
Register in [REFUGE Challenge](https://refuge.grand-challenge.org) and download [REFUGE dataset](https://refuge.grand-challenge.org/REFUGE2Download).
Then, unzip `REFUGE2.zip` and the contents of original datasets include:
```none
├── REFUGE2
│ ├── REFUGE2
│ │ ├── Annotation-Training400.zip
│ │ ├── REFUGE-Test400.zip
│ │ ├── REFUGE-Test-GT.zip
│ │ ├── REFUGE-Training400.zip
│ │ ├── REFUGE-Validation400.zip
│ │ ├── REFUGE-Validation400-GT.zip
│ ├── __MACOSX
```
Please run the following command to convert REFUGE dataset:
```shell
python tools/convert_datasets/refuge.py --raw_data_root=/path/to/refuge/REFUGE2/REFUGE2
```
The script will make directory structure below:
```none
│ ├── REFUGE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── test
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── test
```
It includes 400 images for training, 400 images for validation and 400 images for testing which is the same as REFUGE 2018 dataset.

View File

@ -4,13 +4,132 @@ MMSegmentation provides pre-trained models for semantic segmentation in [Model Z
This note will show how to use existing models to inference on given images.
As for how to test existing models on standard datasets, please see this [guide](./4_train_test.md)
## Inference API
MMSegmentation provides several interfaces for users to easily use pre-trained models for inference.
- [mmseg.apis.init_model](#mmsegapisinit_model)
- [mmseg.apis.inference_model](#mmsegapisinference_model)
- [mmseg.apis.show_result_pyplot](#mmsegapisshow_result_pyplot)
- [Tutorial 3: Inference with existing models](#tutorial-3-inference-with-existing-models)
- [Inferencer](#inferencer)
- [Basic Usage](#basic-usage)
- [Initialization](#initialization)
- [Visualize prediction](#visualize-prediction)
- [List model](#list-model)
- [Inference API](#inference-api)
- [mmseg.apis.init_model](#mmsegapisinit_model)
- [mmseg.apis.inference_model](#mmsegapisinference_model)
- [mmseg.apis.show_result_pyplot](#mmsegapisshow_result_pyplot)
## Inferencer
We provides the most **convenient** way to use the model in MMSegmentation `MMSegInferencer`. You can get segmentation mask for an image with only 3 lines of code.
### Basic Usage
The following example shows how to use `MMSegInferencer` to perform inference on a single image.
```
>>> from mmseg.apis import MMSegInferencer
>>> # Load models into memory
>>> inferencer = MMSegInferencer(model='deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024')
>>> # Inference
>>> inferencer('demo/demo.png', show=True)
```
The visualization result should look like:
<div align="center">
https://user-images.githubusercontent.com/76149310/221507927-ae01e3a7-016f-4425-b966-7b19cbbe494e.png
</div>
Moreover, you can use `MMSegInferencer` to process a list of images:
```
# Input a list of images
>>> images = [image1, image2, ...] # image1 can be a file path or a np.ndarray
>>> inferencer(images, show=True, wait_time=0.5) # wait_time is delay time, and 0 means forever.
# Or input image directory
>>> images = $IMAGESDIR
>>> inferencer(images, show=True, wait_time=0.5)
# Save visualized rendering color maps and predicted results
# out_dir is the directory to save the output results, img_out_dir and pred_out_dir are subdirectories of out_dir
# to save visualized rendering color maps and predicted results
>>> inferencer(images, out_dir='outputs', img_out_dir='vis', pred_out_dir='pred')
```
There is a optional parameter of inferencer, `return_datasamples`, whose default value is False, and
return value of inferencer is a `dict` type by default, including 2 keys 'visualization' and 'predictions'.
If `return_datasamples=True` inferencer will return [`SegDataSample`](../advanced_guides/structures.md), or list of it.
```
result = inferencer('demo/demo.png')
# result is a `dict` including 2 keys 'visualization' and 'predictions'.
# 'visualization' includes color segmentation map
print(result['visualization'].shape)
# (512, 683, 3)
# 'predictions' includes segmentation mask with label indice
print(result['predictions'].shape)
# (512, 683)
result = inferencer('demo/demo.png', return_datasamples=True)
print(type(result))
# <class 'mmseg.structures.seg_data_sample.SegDataSample'>
# Input a list of images
results = inferencer(images)
# The output is list
print(type(results['visualization']), results['visualization'][0].shape)
# <class 'list'> (512, 683, 3)
print(type(results['predictions']), results['predictions'][0].shape)
# <class 'list'> (512, 683)
results = inferencer(images, return_datasamples=True)
# <class 'list'>
print(type(results[0]))
# <class 'mmseg.structures.seg_data_sample.SegDataSample'>
```
### Initialization
`MMSegInferencer` must be initialized from a `model`, which can be a model name or a `Config` even a path of config file.
The model names can be found in models' metafile, like one model name of maskformer is `maskformer_r50-d32_8xb2-160k_ade20k-512x512`, and if input model name and the weights of the model will be download automatically. Below are other input parameters:
- weights (str, optional) - Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded
from metafile. Defaults to None.
- classes (list, optional) - Input classes for result rendering, as the prediction of segmentation
model is a segment map with label indices, `classes` is a list which includes
items responding to the label indices. If classes is not defined, visualizer will take `cityscapes` classes by default. Defaults to None.
- palette (list, optional) - Input palette for result rendering, which is a list of color palette
responding to the classes. If palette is not defined, visualizer will take `cityscapes` palette by default. Defaults to None.
- dataset_name (str, optional)[Dataset name or alias](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317)
visulizer will use the meta information of the dataset i.e. classes and palette,
but the `classes` and `palette` have higher priority. Defaults to None.
- device (str, optional) - Device to run inference. If None, the available device will be automatically used. Defaults to None.
- scope (str, optional) - The scope of the model. Defaults to 'mmseg'.
### Visualize prediction
`MMSegInferencer` supports 4 parameters for visualize prediction, you can use them when call initialized inferencer:
- show (bool) - Whether to display the image in a popup window. Defaults to False.
- wait_time (float) - The interval of show (s). Defaults to 0.
- img_out_dir (str) - Subdirectory of `out_dir`, used to save rendering color segmentation mask, so `out_dir` must be defined
if you would like to save predicted mask. Defaults to 'vis'.
- opacity (int, float) - The transparency of segmentation mask. Defaults to 0.8.
The examples of these parameters is in [Basic Usage](#basic-usage)
### List model
There is a very easy to list all model names in MMSegmentation
```
>>> from mmseg.apis import MMSegInferencer
# models is a list of model names, and them will print automatically
>>> models = MMSegInferencer.list_models('mmseg')
```
## Inference API
### mmseg.apis.init_model
@ -31,14 +150,10 @@ Example:
```python
from mmseg.apis import init_model
from mmseg.utils import register_all_modules
config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
# register all modules in mmseg into the registries
register_all_modules()
# initialize model without checkpoint
model = init_model(config_path)
@ -76,14 +191,11 @@ Example:
```python
from mmseg.apis import init_model, inference_model
from mmseg.utils import register_all_modules
config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
img_path = 'demo/demo.png'
# register all modules in mmseg into the registries
register_all_modules()
model = init_model(config_path, checkpoint_path)
result = inference_model(model, img_path)
@ -115,14 +227,11 @@ Example:
```python
from mmseg.apis import init_model, inference_model, show_result_pyplot
from mmseg.utils import register_all_modules
config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'
img_path = 'demo/demo.png'
# register all modules in mmseg into the registries
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(config_path, checkpoint_path, device='cuda:0')

View File

@ -1,4 +1,4 @@
# Deployment
# \[WIP\] Deployment
> ## [Try the new MMDeploy to deploy your model](https://mmdeploy.readthedocs.io/)

View File

@ -1,4 +1,4 @@
# Useful Tools
# \[WIP\] Useful Tools
Apart from training/testing scripts, We provide lots of useful tools under the
`tools/` directory.

View File

@ -1,4 +1,4 @@
# 自定义数据集(待更新)
# 新增自定义数据集(待更新)
## 通过重新组织数据来定制数据集

View File

@ -1 +0,0 @@
# 添加评测指标

View File

@ -0,0 +1 @@
# 新增评测指标 (待更新)

View File

@ -0,0 +1,3 @@
# 新增模块(待更新)
中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_models.md)

View File

@ -1,230 +0,0 @@
# 自定义模型(待更新)
## 自定义优化器 (optimizer)
假设您想增加一个新的叫 `MyOptimizer` 的优化器,它的参数分别为 `a`, `b`, 和 `c`
您首先需要在一个文件里实现这个新的优化器,例如在 `mmseg/core/optimizer/my_optimizer.py` 里面:
```python
from mmcv.runner import OPTIMIZERS
from torch.optim import Optimizer
@OPTIMIZERS.register_module
class MyOptimizer(Optimizer):
def __init__(self, a, b, c)
```
然后增加这个模块到 `mmseg/core/optimizer/__init__.py` 里面,这样注册器 (registry) 将会发现这个新的模块并添加它:
```python
from .my_optimizer import MyOptimizer
```
之后您可以在配置文件的 `optimizer` 域里使用 `MyOptimizer`
如下所示,在配置文件里,优化器被 `optimizer` 域所定义:
```python
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
```
为了使用您自己的优化器,域可以被修改为:
```python
optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
```
我们已经支持了 PyTorch 自带的全部优化器,唯一修改的地方是在配置文件里的 `optimizer` 域。例如,如果您想使用 `ADAM`,尽管数值表现会掉点,还是可以如下修改:
```python
optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
```
使用者可以直接按照 PyTorch [文档教程](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) 去设置参数。
## 定制优化器的构造器 (optimizer constructor)
对于优化,一些模型可能会有一些特别定义的参数,例如批归一化 (BatchNorm) 层里面的权重衰减 (weight decay)。
使用者可以通过定制优化器的构造器来微调这些细粒度的优化器参数。
```python
from mmcv.utils import build_from_cfg
from mmcv.runner import OPTIMIZER_BUILDERS
from .cocktail_optimizer import CocktailOptimizer
@OPTIMIZER_BUILDERS.register_module
class CocktailOptimizerConstructor(object):
def __init__(self, optim_wrapper_cfg, paramwise_cfg=None):
def __call__(self, model):
return my_optimizer
```
## 开发和增加新的组件Module
MMSegmentation 里主要有2种组件
- 主干网络 (backbone): 通常是卷积网络的堆叠,来做特征提取,例如 ResNet, HRNet
- 解码头 (decoder head): 用于语义分割图的解码的组件(得到分割结果)
### 添加新的主干网络
这里我们以 MobileNet 为例,展示如何增加新的主干组件:
1. 创建一个新的文件 `mmseg/models/backbones/mobilenet.py`
```python
import torch.nn as nn
from ..registry import BACKBONES
@BACKBONES.register_module
class MobileNet(nn.Module):
def __init__(self, arg1, arg2):
pass
def forward(self, x): # should return a tuple
pass
def init_weights(self, pretrained=None):
pass
```
2. 在 `mmseg/models/backbones/__init__.py` 里面导入模块
```python
from .mobilenet import MobileNet
```
3. 在您的配置文件里使用它
```python
model = dict(
...
backbone=dict(
type='MobileNet',
arg1=xxx,
arg2=xxx),
...
```
### 增加新的解码头 (decoder head)组件
在 MMSegmentation 里面,对于所有的分割头,我们提供一个基类解码头 [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/decode_head.py) 。
所有新建的解码头都应该继承它。这里我们以 [PSPNet](https://arxiv.org/abs/1612.01105) 为例,
展示如何开发和增加一个新的解码头组件:
首先,在 `mmseg/models/decode_heads/psp_head.py` 里添加一个新的解码头。
PSPNet 中实现了一个语义分割的解码头。为了实现一个解码头我们只需要在新构造的解码头中实现如下的3个函数
```python
@HEADS.register_module()
class PSPHead(BaseDecodeHead):
def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
super(PSPHead, self).__init__(**kwargs)
def init_weights(self):
def forward(self, inputs):
```
接着,使用者需要在 `mmseg/models/decode_heads/__init__.py` 里面添加这个模块,这样对应的注册器 (registry) 可以查找并加载它们。
PSPNet的配置文件如下所示
```python
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='PSPHead',
in_channels=2048,
in_index=3,
channels=512,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
```
### 增加新的损失函数
假设您想添加一个新的损失函数 `MyLoss` 到语义分割解码器里。
为了添加一个新的损失函数,使用者需要在 `mmseg/models/losses/my_loss.py` 里面去实现它。
`weighted_loss` 可以对计算损失时的每个样本做加权。
```python
import torch
import torch.nn as nn
from ..builder import LOSSES
from .utils import weighted_loss
@weighted_loss
def my_loss(pred, target):
assert pred.size() == target.size() and target.numel() > 0
loss = torch.abs(pred - target)
return loss
@LOSSES.register_module
class MyLoss(nn.Module):
def __init__(self, reduction='mean', loss_weight=1.0):
super(MyLoss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss = self.loss_weight * my_loss(
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
return loss
```
然后使用者需要在 `mmseg/models/losses/__init__.py` 里面添加它:
```python
from .my_loss import MyLoss, my_loss
```
为了使用它,修改 `loss_xxx` 域。之后您需要在解码头组件里修改 `loss_decode` 域。
`loss_weight` 可以被用来对不同的损失函数做加权。
```python
loss_decode=dict(type='MyLoss', loss_weight=1.0))
```

View File

@ -1,166 +1,3 @@
# 自定义数据流程(待更新)
# 新增数据增强(待更新)
## 数据流程的设计
按照通常的惯例,我们使用 `Dataset``DataLoader` 做多线程的数据加载。`Dataset` 返回一个数据内容的字典,里面对应于模型前传方法的各个参数。
因为在语义分割中,输入的图像数据具有不同的大小,我们在 MMCV 里引入一个新的 `DataContainer` 类别去帮助收集和分发不同大小的输入数据。
更多细节,请查看[这里](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) 。
数据的准备流程和数据集是解耦的。通常一个数据集定义了如何处理标注数据annotations信息而一个数据流程定义了准备一个数据字典的所有步骤。一个流程包括了一系列操作每个操作里都把一个字典作为输入然后再输出一个新的字典给下一个变换操作。
这些操作可分为数据加载 (data loading),预处理 (pre-processing),格式变化 (formatting) 和测试时数据增强 (test-time augmentation)。
下面的例子就是 PSPNet 的一个流程:
```python
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 1024),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
```
对于每个操作,我们列出它添加、更新、移除的相关字典域 (dict fields)
### 数据加载 Data loading
`LoadImageFromFile`
- 增加: img, img_shape, ori_shape
`LoadAnnotations`
- 增加: gt_semantic_seg, seg_fields
### 预处理 Pre-processing
`Resize`
- 增加: scale, scale_idx, pad_shape, scale_factor, keep_ratio
- 更新: img, img_shape, \*seg_fields
`RandomFlip`
- 增加: flip
- 更新: img, \*seg_fields
`Pad`
- 增加: pad_fixed_size, pad_size_divisor
- 更新: img, pad_shape, \*seg_fields
`RandomCrop`
- 更新: img, pad_shape, \*seg_fields
`Normalize`
- 增加: img_norm_cfg
- 更新: img
`SegRescale`
- 更新: gt_semantic_seg
`PhotoMetricDistortion`
- 更新: img
### 格式 Formatting
`ToTensor`
- 更新: 由 `keys` 指定
`ImageToTensor`
- 更新: 由 `keys` 指定
`Transpose`
- 更新: 由 `keys` 指定
`ToDataContainer`
- 更新: 由 `keys` 指定
`DefaultFormatBundle`
- 更新: img, gt_semantic_seg
`Collect`
- 增加: img_meta (the keys of img_meta is specified by `meta_keys`)
- 移除: all other keys except for those specified by `keys`
### 测试时数据增强 Test time augmentation
`MultiScaleFlipAug`
## 拓展和使用自定义的流程
1. 在任何一个文件里写一个新的流程,例如 `my_pipeline.py`,它以一个字典作为输入并且输出一个字典
```python
from mmseg.datasets import PIPELINES
@PIPELINES.register_module()
class MyTransform:
def __call__(self, results):
results['dummy'] = True
return results
```
2. 导入一个新类
```python
from .my_pipeline import MyTransform
```
3. 在配置文件里使用它
```python
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='MyTransform'),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
```
中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_transform.md)

View File

@ -1 +1,90 @@
# 数据流
在本章节中,我们将介绍 [Runner](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/runner.html) 管理的内部模块之间的数据流和数据格式约定。
## 数据流概述
[Runner](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/runner.md) 相当于 MMEngine 中的“集成器”。它覆盖了框架的所有方面,并肩负着组织和调度几乎所有模块的责任,这意味着各模块之间的数据流也由 `Runner` 控制。 如 [MMEngine 中的 Runner 文档](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/runner.html)所示,下图展示了基本的数据流。
![Basic dataflow](https://user-images.githubusercontent.com/112053249/199228350-5f80699e-7fd2-4b4c-ac32-0b16b1922c2e.png)
虚线边框、灰色填充形状代表不同的数据格式,而实心框表示模块/方法。由于 MMEngine 极大的灵活性和可扩展性,一些重要的基类可以被继承,并且它们的方法可以被覆写。 上图所示数据流仅适用于当用户没有自定义 `Runner` 中的 `TrainLoop``ValLoop``TestLoop`,并且没有在其自定义模型中覆写 `train_step``val_step``test_step` 方法时。MMSegmentation 中 loop 的默认设置如下:使用`IterBasedTrainLoop` 训练模型,共计 20000 次迭代,并且在每 2000 次迭代后进行一次验证。
```python
train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
```
在上图中,红色线表示 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step) ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step)*** 在每次训练迭代中数据加载器dataloader从存储中加载图像并传输到数据预处理器data preprocessor数据预处理器会将图像放到特定的设备上并将数据堆叠到批处理中之后模型接受批处理数据作为输入最后将模型的输出发送给优化器optimizer。蓝色线表示 [val_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#val_step) 和 [test_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#test_step) ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#test_step)*** 。这两个过程的数据流除了模型输出与 `train_step` 不同外,其余均和 `train_step` 类似。由于在评估时模型参数会被冻结,因此模型的输出将被传递给 [Evaluator](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/evaluation.md#ioumetric) ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/evaluation.md#ioumetric)***
来计算指标。
## MMSegmentation 中的数据流约定
在上面的图中,我们可以看到基本的数据流。在本节中,我们将分别介绍数据流中涉及的数据的格式约定。
### 数据加载器到数据预处理器
数据加载器DataLoader是 MMEngine 的训练和测试流程中的一个重要组件。
从概念上讲,它源于 [PyTorch](https://pytorch.org/) 并保持一致。DataLoader 从文件系统加载数据,原始数据通过数据准备流程后被发送给数据预处理器。
MMSegmentation 在 [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/transforms/formatting.py#L12) 中定义了默认数据格式, 它是 `train_pipeline``test_pipeline` 的最后一个组件。有关数据转换 `pipeline` 的更多信息,请参阅[数据转换文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/transforms.html)。 ***[中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/transforms.html)***
在没有任何修改的情况下PackSegInputs 的返回值通常是一个包含 `inputs``data_samples``dict`。以下伪代码展示了 mmseg 中数据加载器输出的数据类型,它是从数据集中获取的一批数据样本,数据加载器将它们打包成一个字典列表。`inputs` 是输入进模型的张量列表,`data_samples` 包含了输入图像的 meta information 和相应的 ground truth。
```python
dict(
inputs=List[torch.Tensor],
data_samples=List[SegDataSample]
)
```
**注意:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用于连接不同组件。`SegDataSample` 实现了抽象数据元素 `mmengine.structures.BaseDataElement`,更多信息请在 [MMEngine](https://github.com/open-mmlab/mmengine) 中参阅 [SegDataSample 文档](https://mmsegmentation.readthedocs.io/zh_CN/1.x/advanced_guides/structures.html)和[数据元素文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)。
### 数据预处理器到模型
虽然在[上面的图](##数据流概述)中分开绘制了数据预处理器和模型,但数据预处理器是模型的一部分,因此可以在[模型教程](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/models.html)中找到数据预处理器章节。 ***[中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/models.html)***
数据预处理器的返回值是一个包含 `inputs``data_samples` 的字典,其中 `inputs` 是批处理图像的 4D 张量,`data_samples` 中添加了一些用于数据预处理的额外元信息。当传递给网络时,字典将被解包为两个值。 以下伪代码展示了数据预处理器的返回值和模型的输入值。
```python
dict(
inputs=torch.Tensor,
data_samples=List[SegDataSample]
)
```
```python
class Network(BaseSegmentor):
def forward(self, inputs: torch.Tensor, data_samples: List[SegDataSample], mode: str):
pass
```
**注意:** 模型的前向传播有 3 种模式,由输入参数 mode 控制,更多信息请参阅[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md)。 ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md)***
### 模型输出
如[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#forward) ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#forward)*** 所提到的 3 种前向传播具有 3 种输出。
`train_step``test_step`(或 `val_step`)分别对应于 `'loss'``'predict'`
`test_step``val_step` 中,推理结果会被传递给 `Evaluator` 。您可以参阅[评估文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/evaluation.html) ***[中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/evaluation.html)*** 来获取更多关于 `Evaluator` 的信息。
在推理后MMSegmentation 中的 [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L15) 会对推理结果进行简单的后处理以打包推理结果。神经网络生成的分割 logits经过 `argmax` 操作后的分割 mask 和 ground truth如果存在将被打包到类似 `SegDataSample` 的实例。 [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L132) 的返回值是一个 **`SegDataSample``List`**。下图显示了这些 `SegDataSample` 实例的关键属性。
![SegDataSample](https://user-images.githubusercontent.com/15952744/209912225-ab46a8d9-904a-43cb-8bf1-8bec4938ed29.png)
与数据预处理器一致,损失函数也是模型的一部分,它是[解码头](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L142)的属性之一。
在 MMSegmentation 中,`decode_head` 的 [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L291) 方法是用于计算损失的统一接口。
参数:
- seg_logits (Tensor):解码头前向函数的输出
- batch_data_samples (List\[SegDataSample\]):分割数据样本,通常包括如 `metainfo``gt_sem_seg` 等信息
返回值:
- dict\[str, Tensor\]:一个损失组件的字典
**注意:** `train_step` 将损失传递进 OptimWrapper 以更新模型中的权重,更多信息请参阅 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step)。 ***[中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step)***

View File

@ -9,9 +9,10 @@
实例化 Cityscapes 训练数据集:
```python
from mmengine.registry import init_default_scope
from mmseg.datasets import CityscapesDataset
from mmseg.utils import register_all_modules
register_all_modules()
init_default_scope('mmseg')
data_root = 'data/cityscapes/'
data_prefix=dict(img_path='leftImg8bit/train', seg_map_path='gtFine/train')

View File

@ -1 +1,3 @@
# 模型评测
# 模型评测
中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/evaluation.md)

View File

@ -19,7 +19,7 @@
.. toctree::
:maxdepth: 1
add_modules.md
add_models.md
add_datasets.md
add_transforms.md
add_metrics.md

View File

@ -1 +1,3 @@
# 模型
中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/models.md)

View File

@ -1 +1,3 @@
# 数据增广
# 数据增强变化
中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/transforms.md)

View File

@ -11,11 +11,6 @@ datasets
.. automodule:: mmseg.datasets
:members:
samplers
^^^^^^^^^^
.. automodule:: mmseg.datasets.samplers
:members:
transforms
^^^^^^^^^^^^
.. automodule:: mmseg.datasets.transforms

39
docs/zh_cn/device/npu.md Normal file
View File

@ -0,0 +1,39 @@
# NPU (华为 昇腾)
## 使用方法
请参考 [MMCV 的安装文档](https://mmcv.readthedocs.io/en/latest/get_started/build.html#build-mmcv-full-on-ascend-npu-machine) 来安装 NPU 版本的 MMCV。
以下展示单机四卡场景的运行指令:
```shell
bash tools/dist_train.sh configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py 4
```
以下展示单机单卡下的运行指令:
```shell
python tools/train.py configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py
```
## 模型验证结果
| Model | mIoU | Config | Download |
| :-----------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------ |
| [deeplabv3](<>) | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024_20230115_205626.json) |
| [deeplabv3plus](<>) | 79.23 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024_20230116_043450.json) |
| [hrnet](<>) | 78.1 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_hr18_4xb2-40k_cityscapes-512x1024_20230116_215821.json) |
| [fcn](<>) | 74.15 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_r50-d8_4xb2-40k_cityscapes-512x1024_20230111_083014.json) |
| [icnet](<>) | 69.25 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/icnet_r50-d8_4xb2-80k_cityscapes-832x832_20230119_002929.json) |
| [pspnet](<>) | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024_20230114_042721.json) |
| [unet](<>) | 68.86 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024_20230129_224750.json) |
| [upernet](<>) | 77.81 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/upernet_r50_4xb2-40k_cityscapes-512x1024_20230129_014634.json) |
| [apcnet](<>) | 78.02 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024_20230209_212545.json) |
| [bisenetv1](<>) | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024_20230201_023946.json) |
| [bisenetv2](<>) | 72.44 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024_20230205_215606.json) |
**注意:**
- 如果没有特别标记NPU 上的使用混合精度训练的结果与使用 FP32 的 GPU 上的结果相同。
**以上模型结果由华为昇腾团队提供**

View File

@ -34,7 +34,7 @@ conda install pytorch torchvision cpuonly -c pytorch
## 安装
我们建议用户遵循我们的最佳实践来安装 MMSegmentation 。但是整个过程是高度自定义的。更多信息请参见[自定义安装](#自定义安装)部分。
我们建议用户遵循我们的最佳实践来安装 MMSegmentation 。但是整个过程是高度自定义的。更多信息请参见[自定义安装](##自定义安装)部分。
### 最佳实践
@ -92,10 +92,8 @@ python demo/image_demo.py demo/demo.png configs/pspnet/pspnet_r50-d8_4xb2-40k_ci
```python
from mmseg.apis import inference_model, init_model, show_result_pyplot
from mmseg.utils import register_all_modules
import mmcv
register_all_modules()
config_file = 'pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'

View File

@ -1 +1,3 @@
# 迁移文档
中文迁移文档在支持中,请先阅读[英文版迁移文档](../en/migration/)

View File

@ -0,0 +1,102 @@
# 模型库统计数据
- 论文数量: 47
- ALGORITHM: 36
- BACKBONE: 11
- 模型数量: 612
- \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) (16 ckpts)
- \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) (12 ckpts)
- \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit) (2 ckpts)
- \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1) (11 ckpts)
- \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2) (4 ckpts)
- \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) (16 ckpts)
- \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) (2 ckpts)
- \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext) (6 ckpts)
- \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) (16 ckpts)
- \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) (41 ckpts)
- \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) (42 ckpts)
- \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) (12 ckpts)
- \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) (12 ckpts)
- \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt) (1 ckpts)
- \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) (4 ckpts)
- \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) (12 ckpts)
- \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet) (1 ckpts)
- \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn) (12 ckpts)
- \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) (1 ckpts)
- \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) (41 ckpts)
- \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) (16 ckpts)
- \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) (37 ckpts)
- \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet) (12 ckpts)
- \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet) (16 ckpts)
- \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet) (7 ckpts)
- \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae) (1 ckpts)
- \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mask2former) (13 ckpts)
- \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/maskformer) (4 ckpts)
- \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) (8 ckpts)
- \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) (4 ckpts)
- \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) (16 ckpts)
- \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) (24 ckpts)
- \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) (4 ckpts)
- \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/poolformer) (5 ckpts)
- \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) (16 ckpts)
- \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) (54 ckpts)
- \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) (8 ckpts)
- \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer) (13 ckpts)
- \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter) (5 ckpts)
- \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) (4 ckpts)
- \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr) (7 ckpts)
- \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc) (4 ckpts)
- \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin) (6 ckpts)
- \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins) (12 ckpts)
- \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet) (25 ckpts)
- \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) (16 ckpts)
- \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit) (11 ckpts)

View File

@ -1,406 +1,3 @@
## 准备数据集(待更新)
推荐用软链接, 将数据集根目录链接到 `$MMSEGMENTATION/data` 里. 如果您的文件夹结构是不同的, 您也许可以试着修改配置文件里对应的路径.
```none
mmsegmentation
├── mmseg
├── tools
├── configs
├── data
│ ├── cityscapes
│ │ ├── leftImg8bit
│ │ │ ├── train
│ │ │ ├── val
│ │ ├── gtFine
│ │ │ ├── train
│ │ │ ├── val
│ ├── VOCdevkit
│ │ ├── VOC2012
│ │ │ ├── JPEGImages
│ │ │ ├── SegmentationClass
│ │ │ ├── ImageSets
│ │ │ │ ├── Segmentation
│ │ ├── VOC2010
│ │ │ ├── JPEGImages
│ │ │ ├── SegmentationClassContext
│ │ │ ├── ImageSets
│ │ │ │ ├── SegmentationContext
│ │ │ │ │ ├── train.txt
│ │ │ │ │ ├── val.txt
│ │ │ ├── trainval_merged.json
│ │ ├── VOCaug
│ │ │ ├── dataset
│ │ │ │ ├── cls
│ ├── ade
│ │ ├── ADEChallengeData2016
│ │ │ ├── annotations
│ │ │ │ ├── training
│ │ │ │ ├── validation
│ │ │ ├── images
│ │ │ │ ├── training
│ │ │ │ ├── validation
│ ├── CHASE_DB1
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── DRIVE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── HRF
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
│ ├── STARE
│ │ ├── images
│ │ │ ├── training
│ │ │ ├── validation
│ │ ├── annotations
│ │ │ ├── training
│ │ │ ├── validation
| ├── dark_zurich
| │   ├── gps
| │   │   ├── val
| │   │   └── val_ref
| │   ├── gt
| │   │   └── val
| │   ├── LICENSE.txt
| │   ├── lists_file_names
| │   │   ├── val_filenames.txt
| │   │   └── val_ref_filenames.txt
| │   ├── README.md
| │   └── rgb_anon
| │   | ├── val
| │   | └── val_ref
| ├── NighttimeDrivingTest
| | ├── gtCoarse_daytime_trainvaltest
| | │   └── test
| | │   └── night
| | └── leftImg8bit
| | | └── test
| | | └── night
│ ├── loveDA
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ ├── val
│ │ │ ├── test
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
│ ├── potsdam
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ ├── val
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
│ ├── vaihingen
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ ├── val
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
│ ├── iSAID
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ ├── val
│ │ │ ├── test
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
│ ├── synapse
│ │ ├── img_dir
│ │ │ ├── train
│ │ │ ├── val
│ │ ├── ann_dir
│ │ │ ├── train
│ │ │ ├── val
```
### Cityscapes
注册成功后, 数据集可以在 [这里](https://www.cityscapes-dataset.com/downloads/) 下载.
通常情况下, `**labelTrainIds.png` 被用来训练 cityscapes.
基于 [cityscapesscripts](https://github.com/mcordts/cityscapesScripts),
我们提供了一个 [脚本](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/cityscapes.py),
去生成 `**labelTrainIds.png`.
```shell
# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略.
python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8
```
### Pascal VOC
Pascal VOC 2012 可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) 下载.
此外, 许多最近在 Pascal VOC 数据集上的工作都会利用增广的数据, 它们可以在 [这里](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz) 找到.
如果您想使用增广后的 VOC 数据集, 请运行下面的命令来将数据增广的标注转成正确的格式.
```shell
# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略.
python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8
```
关于如何拼接数据集 (concatenate) 并一起训练它们, 更多细节请参考 [拼接连接数据集](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/tutorials/customize_datasets.md#%E6%8B%BC%E6%8E%A5%E6%95%B0%E6%8D%AE%E9%9B%86) .
### ADE20K
ADE20K 的训练集和验证集可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip) 下载.
您还可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip) 下载验证集.
### Pascal Context
Pascal Context 的训练集和验证集可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar) 下载.
注册成功后, 您还可以在 [这里](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) 下载验证集.
为了从原始数据集里切分训练集和验证集, 您可以在 [这里](https://codalabuser.blob.core.windows.net/public/trainval_merged.json)
下载 trainval_merged.json.
如果您想使用 Pascal Context 数据集,
请安装 [细节](https://github.com/zhanghang1989/detail-api) 然后再运行如下命令来把标注转换成正确的格式.
```shell
python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json
```
### CHASE DB1
CHASE DB1 的训练集和验证集可以在 [这里](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip) 下载.
为了将 CHASE DB1 数据集转换成 MMSegmentation 的格式,您需要运行如下命令:
```shell
python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip
```
这个脚本将自动生成正确的文件夹结构.
### DRIVE
DRIVE 的训练集和验证集可以在 [这里](https://drive.grand-challenge.org/) 下载.
在此之前, 您需要注册一个账号, 当前 '1st_manual' 并未被官方提供, 因此需要您从其他地方获取.
为了将 DRIVE 数据集转换成 MMSegmentation 格式, 您需要运行如下命令:
```shell
python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip
```
这个脚本将自动生成正确的文件夹结构.
### HRF
首先, 下载 [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip) [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) 以及 [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip).
为了将 HRF 数据集转换成 MMSegmentation 格式, 您需要运行如下命令:
```shell
python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip
```
这个脚本将自动生成正确的文件夹结构.
### STARE
首先, 下载 [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) 和 [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar).
为了将 STARE 数据集转换成 MMSegmentation 格式, 您需要运行如下命令:
```shell
python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar
```
这个脚本将自动生成正确的文件夹结构.
### Dark Zurich
因为我们只支持在此数据集上测试模型, 所以您只需下载[验证集](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip).
### Nighttime Driving
因为我们只支持在此数据集上测试模型,所以您只需下载[测试集](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip).
### LoveDA
可以从 Google Drive 里下载 [LoveDA数据集](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing).
或者它还可以从 [zenodo](https://zenodo.org/record/5706578#.YZvN7SYRXdF) 下载, 您需要运行如下命令:
```shell
# Download Train.zip
wget https://zenodo.org/record/5706578/files/Train.zip
# Download Val.zip
wget https://zenodo.org/record/5706578/files/Val.zip
# Download Test.zip
wget https://zenodo.org/record/5706578/files/Test.zip
```
对于 LoveDA 数据集,请运行以下命令下载并重新组织数据集:
```shell
python tools/convert_datasets/loveda.py /path/to/loveDA
```
请参照 [这里](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/inference.md) 来使用训练好的模型去预测 LoveDA 测试集并且提交到官网.
关于 LoveDA 的更多细节可以在[这里](https://github.com/Junjue-Wang/LoveDA) 找到.
### ISPRS Potsdam
[Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/)
数据集是一个有着2D 语义分割内容标注的城市遥感数据集.
数据集可以从挑战[主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/) 获得.
需要其中的 `2_Ortho_RGB.zip``5_Labels_all_noBoundary.zip`.
对于 Potsdam 数据集,请运行以下命令下载并重新组织数据集
```shell
python tools/convert_datasets/potsdam.py /path/to/potsdam
```
使用我们默认的配置, 将生成 3,456 张图片的训练集和 2,016 张图片的验证集.
### ISPRS Vaihingen
[Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/)
数据集是一个有着2D 语义分割内容标注的城市遥感数据集.
数据集可以从挑战 [主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/).
需要其中的 'ISPRS_semantic_labeling_Vaihingen.zip' 和 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip'.
对于 Vaihingen 数据集, 请运行以下命令下载并重新组织数据集
```shell
python tools/convert_datasets/vaihingen.py /path/to/vaihingen
```
使用我们默认的配置 (`clip_size`=512, `stride_size`=256), 将生成 344 张图片的训练集和 398 张图片的验证集.
### iSAID
iSAID 数据集(训练集/验证集/测试集)的图像可以从 [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) 下载.
iSAID 数据集(训练集/验证集)的注释可以从 [iSAID](https://captain-whu.github.io/iSAID/dataset.html) 下载.
该数据集是一个大规模的实例分割(也可以用于语义分割)的遥感数据集.
下载后, 在数据集转换前, 您需要将数据集文件夹调整成如下格式.
```
│ ├── iSAID
│ │ ├── train
│ │ │ ├── images
│ │ │ │ ├── part1.zip
│ │ │ │ ├── part2.zip
│ │ │ │ ├── part3.zip
│ │ │ ├── Semantic_masks
│ │ │ │ ├── images.zip
│ │ ├── val
│ │ │ ├── images
│ │ │ │ ├── part1.zip
│ │ │ ├── Semantic_masks
│ │ │ │ ├── images.zip
│ │ ├── test
│ │ │ ├── images
│ │ │ │ ├── part1.zip
│ │ │ │ ├── part2.zip
```
```shell
python tools/convert_datasets/isaid.py /path/to/iSAID
```
使用我们默认的配置 (`patch_width`=896, `patch_height`=896, `overlap_area`=384), 将生成 33,978 张图片的训练集和 11,644 张图片的验证集.
## Synapse dataset
这个数据集可以在这个[网页](https://www.synapse.org/#!Synapse:syn3193805/wiki/) 里被下载.
我们参考了 [TransUNet](https://arxiv.org/abs/2102.04306) 里面的数据集预处理的设置, 它将原始数据集 (30 套 3D 样例) 切分出 18 套用于训练, 12 套用于验证. 请参考以下步骤来准备该数据集:
```shell
unzip RawData.zip
cd ./RawData/Training
```
随后新建 `train.txt``val.txt`.
根据 TransUNet 来将训练集和验证集如下划分:
train.txt
```none
img0005.nii.gz
img0006.nii.gz
img0007.nii.gz
img0009.nii.gz
img0010.nii.gz
img0021.nii.gz
img0023.nii.gz
img0024.nii.gz
img0026.nii.gz
img0027.nii.gz
img0028.nii.gz
img0030.nii.gz
img0031.nii.gz
img0033.nii.gz
img0034.nii.gz
img0037.nii.gz
img0039.nii.gz
img0040.nii.gz
```
val.txt
```none
img0008.nii.gz
img0022.nii.gz
img0038.nii.gz
img0036.nii.gz
img0032.nii.gz
img0002.nii.gz
img0029.nii.gz
img0003.nii.gz
img0001.nii.gz
img0004.nii.gz
img0025.nii.gz
img0035.nii.gz
```
此时, synapse 数据集包括了以下内容:
```none
├── Training
│ ├── img
│ │ ├── img0001.nii.gz
│ │ ├── img0002.nii.gz
│ │ ├── ...
│ ├── label
│ │ ├── label0001.nii.gz
│ │ ├── label0002.nii.gz
│ │ ├── ...
│ ├── train.txt
│ ├── val.txt
```
随后, 运行下面的数据集转换脚本来处理 synapse 数据集:
```shell
python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse
```
使用我们默认的配置, 将生成 2,211 张 2D 图片的训练集和 1,568 张图片的验证集.
需要注意的是 MMSegmentation 默认的评价指标 (例如平均 Dice 值) 都是基于每帧 2D 图片计算的, 这与基于每套 3D 图片计算评价指标的 [TransUNet](https://arxiv.org/abs/2102.04306) 是不同的.
中文版文档支持中,请先阅读[英文版本](../../en/user_guides/2_dataset_prepare.md)

View File

@ -1,127 +1,3 @@
## 使用预训练模型推理(待更新)
我们提供测试脚本来评估完整数据集Cityscapes, PASCAL VOC, ADE20k 等)上的结果,同时为了使其他项目的整合更容易,也提供一些高级 API。
### 测试一个数据集
- 单卡 GPU
- CPU
- 单节点多卡 GPU
- 多节点
您可以使用以下命令来测试一个数据集。
```shell
# 单卡 GPU 测试
python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show]
# CPU: 如果机器没有 GPU, 则跟上述单卡 GPU 测试一致
# CPU: 如果机器有 GPU, 那么先禁用 GPU 再运行单 GPU 测试脚本
export CUDA_VISIBLE_DEVICES=-1 # 禁用 GPU
python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show]
# 多卡GPU 测试
./tools/dist_test.sh ${配置文件} ${检查点文件} ${GPU数目} [--out ${结果文件}] [--eval ${评估指标}]
```
可选参数:
- `RESULT_FILE`: pickle 格式的输出结果的文件名如果不专门指定结果将不会被专门保存成文件。MMseg v0.17 之后args.out 将只会保存评估时的中间结果或者是分割图的保存路径。)
- `EVAL_METRICS`: 在结果里将被评估的指标。这主要取决于数据集, `mIoU` 对于所有数据集都可获得,像 Cityscapes 数据集可以通过 `cityscapes` 命令来专门评估,就像标准的 `mIoU`一样。
- `--show`: 如果被指定,分割结果将会在一张图像里画出来并且在另一个窗口展示。它仅仅是用来调试与可视化,并且仅针对单卡 GPU 测试。请确认 GUI 在您的环境里可用,否则您也许会遇到报错 `cannot connect to X server`
- `--show-dir`: 如果被指定分割结果将会在一张图像里画出来并且保存在指定文件夹里。它仅仅是用来调试与可视化并且仅针对单卡GPU测试。使用该参数时您的环境不需要 GUI。
- `--eval-options`: 评估时的可选参数,当设置 `efficient_test=True` 时,它将会保存中间结果至本地文件里以节约 CPU 内存。请确认您本地硬盘有足够的存储空间大于20GBMMseg v0.17 之后,`efficient_test` 不再生效,我们重构了 test api通过使用一种渐近式的方式来提升评估和保存结果的效率。
例子:
假设您已经下载检查点文件至文件夹 `checkpoints/` 里。
1. 测试 PSPNet 并可视化结果。按下任何键会进行到下一张图
```shell
python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
--show
```
2. 测试 PSPNet 并保存画出的图以便于之后的可视化
```shell
python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
--show-dir psp_r50_512x1024_40ki_cityscapes_results
```
3. 在数据集 PASCAL VOC (不保存测试结果) 上测试 PSPNet 并评估 mIoU
```shell
python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \
checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \
--eval mAP
```
4. 使用4卡 GPU 测试 PSPNet并且在标准 mIoU 和 cityscapes 指标里评估模型
```shell
./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
4 --out results.pkl --eval mIoU cityscapes
```
注意:在 cityscapes mIoU 和我们的 mIoU 指标会有一些差异 (~0.1%) 。因为 cityscapes 默认是根据类别样本数的多少进行加权平均,而我们对所有的数据集都是采取直接平均的方法来得到 mIoU。
5. 在 cityscapes 数据集上4卡 GPU 测试 PSPNet 并生成 png 文件以便提交给官方评估服务器
首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py`
```python
data = dict(
test=dict(
img_dir='leftImg8bit/test',
ann_dir='gtFine/test'))
```
随后,进行测试。
```shell
./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \
checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \
4 --format-only --eval-options "imgfile_prefix=./pspnet_test_results"
```
您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。
您也许可以运行 `zip -r results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://www.cityscapes-dataset.com/submit/) 。
6. 在 Cityscapes 数据集上使用 CPU 高效内存选项来测试 DeeplabV3+ `mIoU` 指标 (没有保存测试结果)
```shell
python tools/test.py \
configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py \
deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \
--eval-options efficient_test=True \
--eval mIoU
```
使用 `pmap` 可查看 CPU 内存情况, `efficient_test=True` 会使用约 2.25GB 的 CPU 内存, `efficient_test=False` 会使用约 11.06GB 的 CPU 内存。 这个可选参数可以节约很多 CPU 内存。MMseg v0.17 之后, `efficient_test` 参数将不再生效, 我们使用了一种渐近的方式来更加有效快速地评估和保存结果。)
7. 在 LoveDA 数据集上1卡 GPU 测试 PSPNet 并生成 png 文件以便提交给官方评估服务器
首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py`,
```python
data = dict(
test=dict(
img_dir='img_dir/test',
ann_dir='ann_dir/test'))
```
随后,进行测试。
```shell
python ./tools/test.py configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py \
checkpoints/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth \
--format-only --eval-options "imgfile_prefix=./pspnet_test_results"
```
您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。
您也许可以运行 `zip -r -j Results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://codalab.lisn.upsaclay.fr/competitions/421) 。
中文版文档支持中,请先阅读[英文版本](../../en/user_guides/3_inference.md)

View File

@ -43,7 +43,7 @@ python tools/train.py ${配置文件} --resume --cfg-options load_from=${检查
export CUDA_VISIBLE_DEVICES=-1
```
然后运行[上方](#在单GPU上训练)脚本。
然后运行[上方](###在单GPU上训练)脚本。
### 在单GPU上测试
@ -69,7 +69,7 @@ python tools/test.py ${配置文件} ${模型权重文件} [可选参数]
export CUDA_VISIBLE_DEVICES=-1
```
然后运行[上方](#在单GPU上测试)脚本。
然后运行[上方](###在单GPU上测试)脚本。
## 多GPU、多机器上训练和测试
@ -85,7 +85,7 @@ OpenMMLab2.0 通过 `MMDistributedDataParallel`实现 **分布式** 训练。
sh tools/dist_train.sh ${配置文件} ${GPU数量} [可选参数]
```
可选参数与[上方](#在单GPU上训练)相同并且还增加了可以指定gpu数量的参数。
可选参数与[上方](###在单GPU上训练)相同并且还增加了可以指定gpu数量的参数。
示例:
@ -112,7 +112,7 @@ ln -s ${您的工作路径} ${MMSEG 路径}/work_dirs
sh tools/dist_test.sh ${配置文件} ${检查点文件} ${GPU数量} [可选参数]
```
可选参数与[上方](#在单GPU上测试)相同并且增加了可以指定 gpu 数量的参数。
可选参数与[上方](###在单GPU上测试)相同并且增加了可以指定 gpu 数量的参数。
示例:

View File

@ -18,3 +18,4 @@
visualization.md
useful_tools.md
deployment.md
visualization_feature_map.md

View File

@ -69,7 +69,7 @@ default_hooks = dict(
work_dirs/test_visual/20220810_115248/vis_data/vis_image
```
另外,如果在 `vis_backends` 中添加 `TensorboardVisBackend` ,如 [TensorBoard 的配置](#tensorboard-configuration),我们还可以运行下面的命令在 TensorBoard 中查看它们:
另外,如果在 `vis_backends` 中添加 `TensorboardVisBackend` ,如 [TensorBoard 的配置](###TensorBoard的配置),我们还可以运行下面的命令在 TensorBoard 中查看它们:
```shell
tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data

View File

@ -0,0 +1,201 @@
# wandb记录特征图可视化
MMSegmentation 1.x 提供了 Weights & Biases 的后端支持,方便对项目代码结果的可视化和管理。
## Wandb的配置
安装 Weights & Biases 的过程可以参考 [官方安装指南](https://docs.wandb.ai/quickstart),具体的步骤如下:
```shell
pip install wandb
wandb login
```
`vis_backend` 中添加 `WandbVisBackend`
```python
vis_backends=[dict(type='LocalVisBackend'),
dict(type='TensorboardVisBackend'),
dict(type='WandbVisBackend')]
```
## 测试数据和结果及特征图的可视化
`SegLocalVisualizer` 是继承自 MMEngine 中 `Visualizer` 类的子类,适用于 MMSegmentation 可视化,有关 `Visualizer` 的详细信息请参考在 MMEngine 中的[可视化教程](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/visualization.html) 。
以下是一个关于 `SegLocalVisualizer` 的示例,首先你可以使用下面的命令下载这个案例中的数据:
<div align=center>
<img src="https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png" width="70%"/>
</div>
```shell
wget https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png --output-document aachen_000000_000019_leftImg8bit.png
wget https://user-images.githubusercontent.com/24582831/189833143-15f60f8a-4d1e-4cbb-a6e7-5e2233869fac.png --output-document aachen_000000_000019_gtFine_labelTrainIds.png
wget https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth
```
```python
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from typing import Type
import mmcv
import torch
import torch.nn as nn
from mmengine.model import revert_sync_batchnorm
from mmengine.structures import PixelData
from mmseg.apis import inference_model, init_model
from mmseg.structures import SegDataSample
from mmseg.utils import register_all_modules
from mmseg.visualization import SegLocalVisualizer
class Recorder:
"""record the forward output feature map and save to data_buffer."""
def __init__(self) -> None:
self.data_buffer = list()
def __enter__(self, ):
self._data_buffer = list()
def record_data_hook(self, model: nn.Module, input: Type, output: Type):
self.data_buffer.append(output)
def __exit__(self, *args, **kwargs):
pass
def visualize(args, model, recorder, result):
seg_visualizer = SegLocalVisualizer(
vis_backends=[dict(type='WandbVisBackend')],
save_dir='temp_dir',
alpha=0.5)
seg_visualizer.dataset_meta = dict(
classes=model.dataset_meta['classes'],
palette=model.dataset_meta['palette'])
image = mmcv.imread(args.img, 'color')
seg_visualizer.add_datasample(
name='predict',
image=image,
data_sample=result,
draw_gt=False,
draw_pred=True,
wait_time=0,
out_file=None,
show=False)
# add feature map to wandb visualizer
for i in range(len(recorder.data_buffer)):
feature = recorder.data_buffer[i][0] # remove the batch
drawn_img = seg_visualizer.draw_featmap(
feature, image, channel_reduction='select_max')
seg_visualizer.add_image(f'feature_map{i}', drawn_img)
if args.gt_mask:
sem_seg = mmcv.imread(args.gt_mask, 'unchanged')
sem_seg = torch.from_numpy(sem_seg)
gt_mask = dict(data=sem_seg)
gt_mask = PixelData(**gt_mask)
data_sample = SegDataSample()
data_sample.gt_sem_seg = gt_mask
seg_visualizer.add_datasample(
name='gt_mask',
image=image,
data_sample=data_sample,
draw_gt=True,
draw_pred=False,
wait_time=0,
out_file=None,
show=False)
seg_visualizer.add_image('image', image)
def main():
parser = ArgumentParser(
description='Draw the Feature Map During Inference')
parser.add_argument('img', help='Image file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('--gt_mask', default=None, help='Path of gt mask file')
parser.add_argument('--out-file', default=None, help='Path to output file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--opacity',
type=float,
default=0.5,
help='Opacity of painted segmentation map. In (0, 1] range.')
parser.add_argument(
'--title', default='result', help='The image identifier.')
args = parser.parse_args()
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
if args.device == 'cpu':
model = revert_sync_batchnorm(model)
# show all named module in the model and use it in source list below
for name, module in model.named_modules():
print(name)
source = [
'decode_head.fusion.stages.0.query_project.activate',
'decode_head.context.stages.0.key_project.activate',
'decode_head.context.bottleneck.activate'
]
source = dict.fromkeys(source)
count = 0
recorder = Recorder()
# registry the forward hook
for name, module in model.named_modules():
if name in source:
count += 1
module.register_forward_hook(recorder.record_data_hook)
if count == len(source):
break
with recorder:
# test a single image, and record feature map to data_buffer
result = inference_model(model, args.img)
visualize(args, model, recorder, result)
if __name__ == '__main__':
main()
```
将上述代码保存为 feature_map_visual.py在终端执行如下代码
```shell
python feature_map_visual.py ${图像} ${配置文件} ${检查点文件} [可选参数]
```
样例
```shell
python feature_map_visual.py \
aachen_000000_000019_leftImg8bit.png \
configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py \
ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth \
--gt_mask aachen_000000_000019_gtFine_labelTrainIds.png
```
可视化后的图像结果和它的对应的 feature map图像会出现在wandb账户中
<div align=center>
<img src="https://user-images.githubusercontent.com/76149310/217520321-647f5bf9-eef2-446d-a9e8-5ca7b621d500.png">
</div>

View File

@ -9,7 +9,7 @@ from .version import __version__, version_info
MMCV_MIN = '2.0.0rc4'
MMCV_MAX = '2.1.0'
MMENGINE_MIN = '0.2.0'
MMENGINE_MIN = '0.5.0'
MMENGINE_MAX = '1.0.0'

View File

@ -1,4 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .inference import inference_model, init_model, show_result_pyplot
from .mmseg_inferencer import MMSegInferencer
__all__ = ['init_model', 'inference_model', 'show_result_pyplot']
__all__ = [
'init_model', 'inference_model', 'show_result_pyplot', 'MMSegInferencer'
]

View File

@ -9,6 +9,7 @@ import numpy as np
import torch
from mmengine import Config
from mmengine.dataset import Compose
from mmengine.registry import init_default_scope
from mmengine.runner import load_checkpoint
from mmengine.utils import mkdir_or_exist
@ -48,6 +49,8 @@ def init_model(config: Union[str, Path, Config],
config.model.backbone.init_cfg = None
config.model.pretrained = None
config.model.train_cfg = None
init_default_scope(config.get('default_scope', 'mmseg'))
model = MODELS.build(config.model)
if checkpoint is not None:
checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')

View File

@ -0,0 +1,361 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import warnings
from typing import List, Optional, Sequence, Union
import mmcv
import mmengine
import numpy as np
import torch
import torch.nn as nn
from mmcv.transforms import Compose
from mmengine.infer.infer import BaseInferencer, ModelType
from mmengine.model import revert_sync_batchnorm
from mmengine.registry import init_default_scope
from mmengine.runner.checkpoint import _load_checkpoint_to_model
from PIL import Image
from mmseg.structures import SegDataSample
from mmseg.utils import ConfigType, SampleList, get_classes, get_palette
from mmseg.visualization import SegLocalVisualizer
InputType = Union[str, np.ndarray]
InputsType = Union[InputType, Sequence[InputType]]
PredType = Union[SegDataSample, SampleList]
class MMSegInferencer(BaseInferencer):
"""Semantic segmentation inferencer, provides inference and visualization
interfaces. Note: MMEngine >= 0.5.0 is required.
Args:
model (str, optional): Path to the config file or the model name
defined in metafile. Take the `mmseg metafile <https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn.yml>`_
as an example the `model` could be
"fcn_r50-d8_4xb2-40k_cityscapes-512x1024", and the weights of model
will be download automatically. If use config file, like
"configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py", the
`weights` should be defined.
weights (str, optional): Path to the checkpoint. If it is not specified
and model is a model name of metafile, the weights will be loaded
from metafile. Defaults to None.
classes (list, optional): Input classes for result rendering, as the
prediction of segmentation model is a segment map with label
indices, `classes` is a list which includes items responding to the
label indices. If classes is not defined, visualizer will take
`cityscapes` classes by default. Defaults to None.
palette (list, optional): Input palette for result rendering, which is
a list of color palette responding to the classes. If palette is
not defined, visualizer will take `cityscapes` palette by default.
Defaults to None.
dataset_name (str, optional): `Dataset name or alias <https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317>`_
visulizer will use the meta information of the dataset i.e. classes
and palette, but the `classes` and `palette` have higher priority.
Defaults to None.
device (str, optional): Device to run inference. If None, the available
device will be automatically used. Defaults to None.
scope (str, optional): The scope of the model. Defaults to 'mmseg'.
""" # noqa
preprocess_kwargs: set = set()
forward_kwargs: set = {'mode', 'out_dir'}
visualize_kwargs: set = {'show', 'wait_time', 'img_out_dir', 'opacity'}
postprocess_kwargs: set = {'pred_out_dir', 'return_datasample'}
def __init__(self,
model: Union[ModelType, str],
weights: Optional[str] = None,
classes: Optional[Union[str, List]] = None,
palette: Optional[Union[str, List]] = None,
dataset_name: Optional[str] = None,
device: Optional[str] = None,
scope: Optional[str] = 'mmseg') -> None:
# A global counter tracking the number of images processes, for
# naming of the output images
self.num_visualized_imgs = 0
self.num_pred_imgs = 0
init_default_scope(scope if scope else 'mmseg')
super().__init__(
model=model, weights=weights, device=device, scope=scope)
if device == 'cpu' or not torch.cuda.is_available():
self.model = revert_sync_batchnorm(self.model)
assert isinstance(self.visualizer, SegLocalVisualizer)
self.visualizer.set_dataset_meta(palette, classes, dataset_name)
def _load_weights_to_model(self, model: nn.Module,
checkpoint: Optional[dict],
cfg: Optional[ConfigType]) -> None:
"""Loading model weights and meta information from cfg and checkpoint.
Subclasses could override this method to load extra meta information
from ``checkpoint`` and ``cfg`` to model.
Args:
model (nn.Module): Model to load weights and meta information.
checkpoint (dict, optional): The loaded checkpoint.
cfg (Config or ConfigDict, optional): The loaded config.
"""
if checkpoint is not None:
_load_checkpoint_to_model(model, checkpoint)
checkpoint_meta = checkpoint.get('meta', {})
# save the dataset_meta in the model for convenience
if 'dataset_meta' in checkpoint_meta:
# mmsegmentation 1.x
model.dataset_meta = {
'classes': checkpoint_meta['dataset_meta'].get('classes'),
'palette': checkpoint_meta['dataset_meta'].get('palette')
}
elif 'CLASSES' in checkpoint_meta:
# mmsegmentation 0.x
classes = checkpoint_meta['CLASSES']
palette = checkpoint_meta.get('PALETTE', None)
model.dataset_meta = {'classes': classes, 'palette': palette}
else:
warnings.warn(
'dataset_meta or class names are not saved in the '
'checkpoint\'s meta data, use classes of Cityscapes by '
'default.')
model.dataset_meta = {
'classes': get_classes('cityscapes'),
'palette': get_palette('cityscapes')
}
else:
warnings.warn('Checkpoint is not loaded, and the inference '
'result is calculated by the randomly initialized '
'model!')
warnings.warn(
'weights is None, use cityscapes classes by default.')
model.dataset_meta = {
'classes': get_classes('cityscapes'),
'palette': get_palette('cityscapes')
}
def __call__(self,
inputs: InputsType,
return_datasamples: bool = False,
batch_size: int = 1,
show: bool = False,
wait_time: int = 0,
out_dir: str = '',
img_out_dir: str = 'vis',
pred_out_dir: str = 'pred',
**kwargs) -> dict:
"""Call the inferencer.
Args:
inputs (Union[list, str, np.ndarray]): Inputs for the inferencer.
return_datasamples (bool): Whether to return results as
:obj:`SegDataSample`. Defaults to False.
batch_size (int): Batch size. Defaults to 1.
show (bool): Whether to display the rendering color segmentation
mask in a popup window. Defaults to False.
wait_time (float): The interval of show (s). Defaults to 0.
out_dir (str): Output directory of inference results. Defaults
to ''.
img_out_dir (str): Subdirectory of `out_dir`, used to save
rendering color segmentation mask, so `out_dir` must be defined
if you would like to save predicted mask. Defaults to 'vis'.
pred_out_dir (str): Subdirectory of `out_dir`, used to save
predicted mask file, so `out_dir` must be defined if you would
like to save predicted mask. Defaults to 'pred'.
**kwargs: Other keyword arguments passed to :meth:`preprocess`,
:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
Each key in kwargs should be in the corresponding set of
``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
and ``postprocess_kwargs``.
Returns:
dict: Inference and visualization results.
"""
if out_dir != '':
pred_out_dir = osp.join(out_dir, pred_out_dir)
img_out_dir = osp.join(out_dir, img_out_dir)
else:
pred_out_dir = ''
img_out_dir = ''
return super().__call__(
inputs=inputs,
return_datasamples=return_datasamples,
batch_size=batch_size,
show=show,
wait_time=wait_time,
img_out_dir=img_out_dir,
pred_out_dir=pred_out_dir,
**kwargs)
def visualize(self,
inputs: list,
preds: List[dict],
show: bool = False,
wait_time: int = 0,
img_out_dir: str = '',
opacity: float = 0.8) -> List[np.ndarray]:
"""Visualize predictions.
Args:
inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
preds (Any): Predictions of the model.
show (bool): Whether to display the image in a popup window.
Defaults to False.
wait_time (float): The interval of show (s). Defaults to 0.
img_out_dir (str): Output directory of rendering prediction i.e.
color segmentation mask. Defaults: ''
opacity (int, float): The transparency of segmentation mask.
Defaults to 0.8.
Returns:
List[np.ndarray]: Visualization results.
"""
if self.visualizer is None or (not show and img_out_dir == ''):
return None
if getattr(self, 'visualizer') is None:
raise ValueError('Visualization needs the "visualizer" term'
'defined in the config, but got None')
self.visualizer.set_dataset_meta(**self.model.dataset_meta)
self.visualizer.alpha = opacity
results = []
for single_input, pred in zip(inputs, preds):
if isinstance(single_input, str):
img_bytes = mmengine.fileio.get(single_input)
img = mmcv.imfrombytes(img_bytes)
img = img[:, :, ::-1]
img_name = osp.basename(single_input)
elif isinstance(single_input, np.ndarray):
img = single_input.copy()
img_num = str(self.num_visualized_imgs).zfill(8) + '_vis'
img_name = f'{img_num}.jpg'
else:
raise ValueError('Unsupported input type:'
f'{type(single_input)}')
out_file = osp.join(img_out_dir, img_name) if img_out_dir != ''\
else None
self.visualizer.add_datasample(
img_name,
img,
pred,
show=show,
wait_time=wait_time,
draw_gt=False,
draw_pred=True,
out_file=out_file)
results.append(self.visualizer.get_image())
self.num_visualized_imgs += 1
return results
def postprocess(self,
preds: PredType,
visualization: List[np.ndarray],
return_datasample: bool = False,
pred_out_dir: str = '') -> dict:
"""Process the predictions and visualization results from ``forward``
and ``visualize``.
This method should be responsible for the following tasks:
1. Pack the predictions and visualization results and return them.
2. Save the predictions, if it needed.
Args:
preds (List[Dict]): Predictions of the model.
visualization (List[np.ndarray]): The list of rendering color
segmentation mask.
return_datasample (bool): Whether to return results as datasamples.
Defaults to False.
pred_out_dir: File to save the inference results w/o
visualization. If left as empty, no file will be saved.
Defaults to ''.
Returns:
dict: Inference and visualization results with key ``predictions``
and ``visualization``
- ``visualization (Any)``: Returned by :meth:`visualize`
- ``predictions`` (List[np.ndarray], np.ndarray): Returned by
:meth:`forward` and processed in :meth:`postprocess`.
If ``return_datasample=False``, it will be the segmentation mask
with label indice.
"""
if return_datasample:
if len(preds) == 1:
return preds[0]
else:
return preds
results_dict = {}
results_dict['predictions'] = []
results_dict['visualization'] = []
for i, pred in enumerate(preds):
pred_data = pred.pred_sem_seg.numpy().data[0]
results_dict['predictions'].append(pred_data)
if visualization is not None:
vis = visualization[i]
results_dict['visualization'].append(vis)
if pred_out_dir != '':
mmengine.mkdir_or_exist(pred_out_dir)
img_name = str(self.num_pred_imgs).zfill(8) + '_pred.png'
img_path = osp.join(pred_out_dir, img_name)
output = Image.fromarray(pred_data.astype(np.uint8))
output.save(img_path)
self.num_pred_imgs += 1
if len(results_dict['predictions']) == 1:
results_dict['predictions'] = results_dict['predictions'][0]
if visualization is not None:
results_dict['visualization'] = \
results_dict['visualization'][0]
return results_dict
def _init_pipeline(self, cfg: ConfigType) -> Compose:
"""Initialize the test pipeline.
Return a pipeline to handle various input data, such as ``str``,
``np.ndarray``. It is an abstract method in BaseInferencer, and should
be implemented in subclasses.
The returned pipeline will be used to process a single data.
It will be used in :meth:`preprocess` like this:
.. code-block:: python
def preprocess(self, inputs, batch_size, **kwargs):
...
dataset = map(self.pipeline, dataset)
...
"""
pipeline_cfg = cfg.test_dataloader.dataset.pipeline
# Loading annotations is also not applicable
idx = self._get_transform_idx(pipeline_cfg, 'LoadAnnotations')
if idx != -1:
del pipeline_cfg[idx]
load_img_idx = self._get_transform_idx(pipeline_cfg,
'LoadImageFromFile')
if load_img_idx == -1:
raise ValueError(
'LoadImageFromFile is not found in the test pipeline')
pipeline_cfg[load_img_idx]['type'] = 'InferencerLoader'
return Compose(pipeline_cfg)
def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
"""Returns the index of the transform in a pipeline.
If the transform is not found, returns -1.
"""
for i, transform in enumerate(pipeline_cfg):
if transform['type'] == name:
return i
return -1

View File

@ -17,6 +17,7 @@ from .loveda import LoveDADataset
from .night_driving import NightDrivingDataset
from .pascal_context import PascalContextDataset, PascalContextDataset59
from .potsdam import PotsdamDataset
from .refuge import REFUGEDataset
from .stare import STAREDataset
from .synapse import SynapseDataset
# yapf: disable
@ -48,5 +49,5 @@ __all__ = [
'DecathlonDataset', 'LIPDataset', 'ResizeShortestEdge',
'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur',
'BioMedicalRandomGamma', 'BioMedical3DPad', 'RandomRotFlip',
'SynapseDataset'
'SynapseDataset', 'REFUGEDataset'
]

View File

@ -73,38 +73,36 @@ class BaseSegDataset(BaseDataset):
ignore_index (int): The label index to be ignored. Default: 255
reduce_zero_label (bool): Whether to mark label zero as ignored.
Default to False.
backend_args (dict): Arguments to instantiate a file backend.
backend_args (dict, Optional): Arguments to instantiate a file backend.
See https://mmengine.readthedocs.io/en/latest/api/fileio.htm
for details. Defaults to ``dict(backend='local')``
for details. Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
METAINFO: dict = dict()
def __init__(
self,
ann_file: str = '',
img_suffix='.jpg',
seg_map_suffix='.png',
metainfo: Optional[dict] = None,
data_root: Optional[str] = None,
data_prefix: dict = dict(img_path='', seg_map_path=''),
filter_cfg: Optional[dict] = None,
indices: Optional[Union[int, Sequence[int]]] = None,
serialize_data: bool = True,
pipeline: List[Union[dict, Callable]] = [],
test_mode: bool = False,
lazy_init: bool = False,
max_refetch: int = 1000,
ignore_index: int = 255,
reduce_zero_label: bool = False,
backend_args: dict = dict(backend='local')
) -> None:
def __init__(self,
ann_file: str = '',
img_suffix='.jpg',
seg_map_suffix='.png',
metainfo: Optional[dict] = None,
data_root: Optional[str] = None,
data_prefix: dict = dict(img_path='', seg_map_path=''),
filter_cfg: Optional[dict] = None,
indices: Optional[Union[int, Sequence[int]]] = None,
serialize_data: bool = True,
pipeline: List[Union[dict, Callable]] = [],
test_mode: bool = False,
lazy_init: bool = False,
max_refetch: int = 1000,
ignore_index: int = 255,
reduce_zero_label: bool = False,
backend_args: Optional[dict] = None) -> None:
self.img_suffix = img_suffix
self.seg_map_suffix = seg_map_suffix
self.ignore_index = ignore_index
self.reduce_zero_label = reduce_zero_label
self.backend_args = backend_args.copy()
self.backend_args = backend_args.copy() if backend_args else None
self.data_root = data_root
self.data_prefix = copy.copy(data_prefix)

28
mmseg/datasets/refuge.py Normal file
View File

@ -0,0 +1,28 @@
# Copyright (c) OpenMMLab. All rights reserved.
import mmengine.fileio as fileio
from mmseg.registry import DATASETS
from .basesegdataset import BaseSegDataset
@DATASETS.register_module()
class REFUGEDataset(BaseSegDataset):
"""REFUGE dataset.
In segmentation map annotation for REFUGE, 0 stands for background, which
is not included in 2 categories. ``reduce_zero_label`` is fixed to True.
The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'.png'.
"""
METAINFO = dict(
classes=('background', ' Optic Cup', 'Optic Disc'),
palette=[[120, 120, 120], [6, 230, 230], [56, 59, 120]])
def __init__(self, **kwargs) -> None:
super().__init__(
img_suffix='.png',
seg_map_suffix='.png',
reduce_zero_label=False,
**kwargs)
assert fileio.exists(
self.data_prefix['img_path'], backend_args=self.backend_args)

View File

@ -63,8 +63,12 @@ class PackSegInputs(BaseTransform):
img = results['img']
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1))
packed_results['inputs'] = to_tensor(img)
if not img.flags.c_contiguous:
img = to_tensor(np.ascontiguousarray(img.transpose(2, 0, 1)))
else:
img = img.transpose(2, 0, 1)
img = to_tensor(img).contiguous()
packed_results['inputs'] = img
data_sample = SegDataSample()
if 'gt_seg_map' in results:

View File

@ -1,6 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict
from typing import Dict, Optional, Union
import mmcv
import mmengine.fileio as fileio
@ -56,14 +56,14 @@ class LoadAnnotations(MMCV_LoadAnnotations):
Defaults to 'pillow'.
backend_args (dict): Arguments to instantiate a file backend.
See https://mmengine.readthedocs.io/en/latest/api/fileio.htm
for details. Defaults to ``dict(backend='local')``
for details. Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
def __init__(
self,
reduce_zero_label=None,
backend_args=dict(backend='local'),
backend_args=None,
imdecode_backend='pillow',
) -> None:
super().__init__(
@ -203,23 +203,21 @@ class LoadBiomedicalImageFromFile(BaseTransform):
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an float64 array.
Defaults to True.
backend_args (dict): Arguments to instantiate a file backend.
backend_args (dict, Optional): Arguments to instantiate a file backend.
See https://mmengine.readthedocs.io/en/latest/api/fileio.htm
for details. Defaults to ``dict(backend='local')``
for details. Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
def __init__(
self,
decode_backend: str = 'nifti',
to_xyz: bool = False,
to_float32: bool = True,
backend_args: dict = dict(backend='local')
) -> None:
def __init__(self,
decode_backend: str = 'nifti',
to_xyz: bool = False,
to_float32: bool = True,
backend_args: Optional[dict] = None) -> None:
self.decode_backend = decode_backend
self.to_xyz = to_xyz
self.to_float32 = to_float32
self.backend_args = backend_args.copy()
self.backend_args = backend_args.copy() if backend_args else None
def transform(self, results: Dict) -> Dict:
"""Functions to load image.
@ -295,24 +293,22 @@ class LoadBiomedicalAnnotation(BaseTransform):
to_float32 (bool): Whether to convert the loaded seg map to a float32
numpy array. If set to False, the loaded image is an float64 array.
Defaults to True.
backend_args (dict): Arguments to instantiate a file backend.
backend_args (dict, Optional): Arguments to instantiate a file backend.
See :class:`mmengine.fileio` for details.
Defaults to ``dict(backend='local')``.
Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
def __init__(
self,
decode_backend: str = 'nifti',
to_xyz: bool = False,
to_float32: bool = True,
backend_args: dict = dict(backend='local')
) -> None:
def __init__(self,
decode_backend: str = 'nifti',
to_xyz: bool = False,
to_float32: bool = True,
backend_args: Optional[dict] = None) -> None:
super().__init__()
self.decode_backend = decode_backend
self.to_xyz = to_xyz
self.to_float32 = to_float32
self.backend_args = backend_args.copy()
self.backend_args = backend_args.copy() if backend_args else None
def transform(self, results: Dict) -> Dict:
"""Functions to load image.
@ -384,23 +380,21 @@ class LoadBiomedicalData(BaseTransform):
backend is 'nifti'. Defaults to 'nifti'.
to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
Defaults to False.
backend_args (dict): Arguments to instantiate a file backend.
backend_args (dict, Optional): Arguments to instantiate a file backend.
See https://mmengine.readthedocs.io/en/latest/api/fileio.htm
for details. Defaults to ``dict(backend='local')``
for details. Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
def __init__(
self,
with_seg=False,
decode_backend: str = 'numpy',
to_xyz: bool = False,
backend_args: dict = dict(backend='local')
) -> None: # noqa
def __init__(self,
with_seg=False,
decode_backend: str = 'numpy',
to_xyz: bool = False,
backend_args: Optional[dict] = None) -> None: # noqa
self.with_seg = with_seg
self.decode_backend = decode_backend
self.to_xyz = to_xyz
self.backend_args = backend_args.copy()
self.backend_args = backend_args.copy() if backend_args else None
def transform(self, results: Dict) -> Dict:
"""Functions to load image.
@ -443,3 +437,59 @@ class LoadBiomedicalData(BaseTransform):
f'to_xyz={self.to_xyz}, '
f'backend_args={self.backend_args})')
return repr_str
@TRANSFORMS.register_module()
class InferencerLoader(BaseTransform):
"""Load an image from ``results['img']``.
Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
:obj:`np.ndarray` in ``results['img']``. Can be used when loading image
from webcam.
Required Keys:
- img
Modified Keys:
- img
- img_path
- img_shape
- ori_shape
Args:
to_float32 (bool): Whether to convert the loaded image to a float32
numpy array. If set to False, the loaded image is an uint8 array.
Defaults to False.
"""
def __init__(self, **kwargs) -> None:
super().__init__()
self.from_file = TRANSFORMS.build(
dict(type='LoadImageFromFile', **kwargs))
self.from_ndarray = TRANSFORMS.build(
dict(type='LoadImageFromNDArray', **kwargs))
def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict:
"""Transform function to add image meta information.
Args:
results (dict): Result dict with Webcam read image in
``results['img']``.
Returns:
dict: The dict contains loaded image and meta information.
"""
if isinstance(single_input, str):
inputs = dict(img_path=single_input)
elif isinstance(single_input, np.ndarray):
inputs = dict(img=single_input)
elif isinstance(single_input, dict):
inputs = single_input
else:
raise NotImplementedError
if 'img' in inputs:
return self.from_ndarray(inputs)
return self.from_file(inputs)

View File

@ -1,7 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import warnings
from typing import Sequence
from typing import Optional, Sequence
import mmcv
import mmengine.fileio as fileio
@ -30,9 +30,9 @@ class SegVisualizationHook(Hook):
interval (int): The interval of visualization. Defaults to 50.
show (bool): Whether to display the drawn image. Default to False.
wait_time (float): The interval of show (s). Defaults to 0.
backend_args (dict): Arguments to instantiate a file backend.
backend_args (dict, Optional): Arguments to instantiate a file backend.
See https://mmengine.readthedocs.io/en/latest/api/fileio.htm
for details. Defaults to ``dict(backend='local')``
for details. Defaults to None.
Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required.
"""
@ -41,7 +41,7 @@ class SegVisualizationHook(Hook):
interval: int = 50,
show: bool = False,
wait_time: float = 0.,
backend_args: dict = dict(backend='local')):
backend_args: Optional[dict] = None):
self._visualizer: SegLocalVisualizer = \
SegLocalVisualizer.get_current_instance()
self.interval = interval
@ -55,7 +55,7 @@ class SegVisualizationHook(Hook):
'needs to be excluded.')
self.wait_time = wait_time
self.backend_args = backend_args.copy()
self.backend_args = backend_args.copy() if backend_args else None
self.draw = draw
if not self.draw:
warnings.warn('The draw is False, it means that the '

View File

@ -68,6 +68,7 @@ class CascadeEncoderDecoder(EncoderDecoder):
self.decode_head.append(MODELS.build(decode_head[i]))
self.align_corners = self.decode_head[-1].align_corners
self.num_classes = self.decode_head[-1].num_classes
self.out_channels = self.decode_head[-1].out_channels
def encode_decode(self, inputs: Tensor,
batch_img_metas: List[dict]) -> Tensor:

View File

@ -1,13 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .registry import (DATA_SAMPLERS, DATASETS, HOOKS, LOOPS, METRICS,
MODEL_WRAPPERS, MODELS, OPTIM_WRAPPER_CONSTRUCTORS,
OPTIMIZERS, PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS,
RUNNERS, TASK_UTILS, TRANSFORMS, VISBACKENDS,
VISUALIZERS, WEIGHT_INITIALIZERS)
from .registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS, INFERENCERS,
LOG_PROCESSORS, LOOPS, METRICS, MODEL_WRAPPERS, MODELS,
OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, OPTIMIZERS,
PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, RUNNERS,
TASK_UTILS, TRANSFORMS, VISBACKENDS, VISUALIZERS,
WEIGHT_INITIALIZERS)
__all__ = [
'RUNNERS', 'RUNNER_CONSTRUCTORS', 'HOOKS', 'DATASETS', 'DATA_SAMPLERS',
'TRANSFORMS', 'MODELS', 'WEIGHT_INITIALIZERS', 'OPTIMIZERS',
'OPTIM_WRAPPER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS',
'MODEL_WRAPPERS', 'LOOPS', 'VISBACKENDS', 'VISUALIZERS'
'HOOKS', 'DATASETS', 'DATA_SAMPLERS', 'TRANSFORMS', 'MODELS',
'WEIGHT_INITIALIZERS', 'OPTIMIZERS', 'OPTIM_WRAPPER_CONSTRUCTORS',
'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', 'MODEL_WRAPPERS',
'VISBACKENDS', 'VISUALIZERS', 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'LOOPS',
'EVALUATOR', 'LOG_PROCESSORS', 'OPTIM_WRAPPERS', 'INFERENCERS'
]

View File

@ -10,6 +10,7 @@ from mmengine.registry import DATA_SAMPLERS as MMENGINE_DATA_SAMPLERS
from mmengine.registry import DATASETS as MMENGINE_DATASETS
from mmengine.registry import EVALUATOR as MMENGINE_EVALUATOR
from mmengine.registry import HOOKS as MMENGINE_HOOKS
from mmengine.registry import INFERENCERS as MMENGINE_INFERENCERS
from mmengine.registry import LOG_PROCESSORS as MMENGINE_LOG_PROCESSORS
from mmengine.registry import LOOPS as MMENGINE_LOOPS
from mmengine.registry import METRICS as MMENGINE_METRICS
@ -39,45 +40,82 @@ RUNNER_CONSTRUCTORS = Registry(
# manage all kinds of loops like `EpochBasedTrainLoop`
LOOPS = Registry('loop', parent=MMENGINE_LOOPS)
# manage all kinds of hooks like `CheckpointHook`
HOOKS = Registry('hook', parent=MMENGINE_HOOKS)
HOOKS = Registry(
'hook', parent=MMENGINE_HOOKS, locations=['mmseg.engine.hooks'])
# manage data-related modules
DATASETS = Registry('dataset', parent=MMENGINE_DATASETS)
DATA_SAMPLERS = Registry('data sampler', parent=MMENGINE_DATA_SAMPLERS)
TRANSFORMS = Registry('transform', parent=MMENGINE_TRANSFORMS)
DATASETS = Registry(
'dataset', parent=MMENGINE_DATASETS, locations=['mmseg.datasets'])
DATA_SAMPLERS = Registry(
'data sampler',
parent=MMENGINE_DATA_SAMPLERS,
locations=['mmseg.datasets.samplers'])
TRANSFORMS = Registry(
'transform',
parent=MMENGINE_TRANSFORMS,
locations=['mmseg.datasets.transforms'])
# mangage all kinds of modules inheriting `nn.Module`
MODELS = Registry('model', parent=MMENGINE_MODELS)
MODELS = Registry('model', parent=MMENGINE_MODELS, locations=['mmseg.models'])
# mangage all kinds of model wrappers like 'MMDistributedDataParallel'
MODEL_WRAPPERS = Registry('model_wrapper', parent=MMENGINE_MODEL_WRAPPERS)
MODEL_WRAPPERS = Registry(
'model_wrapper',
parent=MMENGINE_MODEL_WRAPPERS,
locations=['mmseg.models'])
# mangage all kinds of weight initialization modules like `Uniform`
WEIGHT_INITIALIZERS = Registry(
'weight initializer', parent=MMENGINE_WEIGHT_INITIALIZERS)
'weight initializer',
parent=MMENGINE_WEIGHT_INITIALIZERS,
locations=['mmseg.models'])
# mangage all kinds of optimizers like `SGD` and `Adam`
OPTIMIZERS = Registry('optimizer', parent=MMENGINE_OPTIMIZERS)
OPTIMIZERS = Registry(
'optimizer',
parent=MMENGINE_OPTIMIZERS,
locations=['mmseg.engine.optimizers'])
# manage optimizer wrapper
OPTIM_WRAPPERS = Registry('optim_wrapper', parent=MMENGINE_OPTIM_WRAPPERS)
OPTIM_WRAPPERS = Registry(
'optim_wrapper',
parent=MMENGINE_OPTIM_WRAPPERS,
locations=['mmseg.engine.optimizers'])
# manage constructors that customize the optimization hyperparameters.
OPTIM_WRAPPER_CONSTRUCTORS = Registry(
'optimizer wrapper constructor',
parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS)
parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS,
locations=['mmseg.engine.optimizers'])
# mangage all kinds of parameter schedulers like `MultiStepLR`
PARAM_SCHEDULERS = Registry(
'parameter scheduler', parent=MMENGINE_PARAM_SCHEDULERS)
'parameter scheduler',
parent=MMENGINE_PARAM_SCHEDULERS,
locations=['mmseg.engine.schedulers'])
# manage all kinds of metrics
METRICS = Registry('metric', parent=MMENGINE_METRICS)
METRICS = Registry(
'metric', parent=MMENGINE_METRICS, locations=['mmseg.evaluation'])
# manage evaluator
EVALUATOR = Registry('evaluator', parent=MMENGINE_EVALUATOR)
EVALUATOR = Registry(
'evaluator', parent=MMENGINE_EVALUATOR, locations=['mmseg.evaluation'])
# manage task-specific modules like ohem pixel sampler
TASK_UTILS = Registry('task util', parent=MMENGINE_TASK_UTILS)
TASK_UTILS = Registry(
'task util', parent=MMENGINE_TASK_UTILS, locations=['mmseg.models'])
# manage visualizer
VISUALIZERS = Registry('visualizer', parent=MMENGINE_VISUALIZERS)
VISUALIZERS = Registry(
'visualizer',
parent=MMENGINE_VISUALIZERS,
locations=['mmseg.visualization'])
# manage visualizer backend
VISBACKENDS = Registry('vis_backend', parent=MMENGINE_VISBACKENDS)
VISBACKENDS = Registry(
'vis_backend',
parent=MMENGINE_VISBACKENDS,
locations=['mmseg.visualization'])
# manage logprocessor
LOG_PROCESSORS = Registry('log_processor', parent=MMENGINE_LOG_PROCESSORS)
LOG_PROCESSORS = Registry(
'log_processor',
parent=MMENGINE_LOG_PROCESSORS,
locations=['mmseg.visualization'])
# manage inferencer
INFERENCERS = Registry('inferencer', parent=MMENGINE_INFERENCERS)

View File

@ -1,6 +1,6 @@
# Copyright (c) Open-MMLab. All rights reserved.
__version__ = '1.0.0rc5'
__version__ = '1.0.0rc6'
def parse_version_info(version_str):

View File

@ -1,5 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional
import mmcv
import numpy as np
@ -24,6 +24,17 @@ class SegLocalVisualizer(Visualizer):
Defaults to None.
save_dir (str, optional): Save file dir for all storage backends.
If it is None, the backend storage will not save any data.
classes (list, optional): Input classes for result rendering, as the
prediction of segmentation model is a segment map with label
indices, `classes` is a list which includes items responding to the
label indices. If classes is not defined, visualizer will take
`cityscapes` classes by default. Defaults to None.
palette (list, optional): Input palette for result rendering, which is
a list of color palette responding to the classes. Defaults to None.
dataset_name (str, optional): `Dataset name or alias <https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317>`_
visulizer will use the meta information of the dataset i.e. classes
and palette, but the `classes` and `palette` have higher priority.
Defaults to None.
alpha (int, float): The transparency of segmentation mask.
Defaults to 0.8.
@ -49,43 +60,40 @@ class SegLocalVisualizer(Visualizer):
>>> seg_local_visualizer.add_datasample(
... 'visualizer_example', image,
... gt_seg_data_sample, show=True)
"""
""" # noqa
def __init__(self,
name: str = 'visualizer',
image: Optional[np.ndarray] = None,
vis_backends: Optional[Dict] = None,
save_dir: Optional[str] = None,
palette: Optional[Union[str, List]] = None,
classes: Optional[Union[str, List]] = None,
classes: Optional[List] = None,
palette: Optional[List] = None,
dataset_name: Optional[str] = None,
alpha: float = 0.8,
**kwargs):
super().__init__(name, image, vis_backends, save_dir, **kwargs)
self.alpha: float = alpha
# Set default value. When calling
# `SegLocalVisualizer().dataset_meta=xxx`,
# it will override the default value.
if dataset_name is None:
dataset_name = 'cityscapes'
classes = classes if classes else get_classes(dataset_name)
palette = palette if palette else get_palette(dataset_name)
assert len(classes) == len(
palette), 'The length of classes should be equal to palette'
self.dataset_meta: dict = {'classes': classes, 'palette': palette}
self.set_dataset_meta(palette, classes, dataset_name)
def _draw_sem_seg(self, image: np.ndarray, sem_seg: PixelData,
classes: Optional[Tuple[str]],
palette: Optional[List[List[int]]]) -> np.ndarray:
classes: Optional[List],
palette: Optional[List]) -> np.ndarray:
"""Draw semantic seg of GT or prediction.
Args:
image (np.ndarray): The image to draw.
sem_seg (:obj:`PixelData`): Data structure for
pixel-level annotations or predictions.
classes (Tuple[str], optional): Category information.
palette (List[List[int]], optional): The palette of
segmentation map.
sem_seg (:obj:`PixelData`): Data structure for pixel-level
annotations or predictions.
classes (list, optional): Input classes for result rendering, as
the prediction of segmentation model is a segment map with
label indices, `classes` is a list which includes items
responding to the label indices. If classes is not defined,
visualizer will take `cityscapes` classes by default.
Defaults to None.
palette (list, optional): Input palette for result rendering, which
is a list of color palette responding to the classes.
Defaults to None.
Returns:
np.ndarray: the drawn image which channel is RGB.
@ -109,6 +117,38 @@ class SegLocalVisualizer(Visualizer):
return self.get_image()
def set_dataset_meta(self,
classes: Optional[List] = None,
palette: Optional[List] = None,
dataset_name: Optional[str] = None) -> None:
"""Set meta information to visualizer.
Args:
classes (list, optional): Input classes for result rendering, as
the prediction of segmentation model is a segment map with
label indices, `classes` is a list which includes items
responding to the label indices. If classes is not defined,
visualizer will take `cityscapes` classes by default.
Defaults to None.
palette (list, optional): Input palette for result rendering, which
is a list of color palette responding to the classes.
Defaults to None.
dataset_name (str, optional): `Dataset name or alias <https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317>`_
visulizer will use the meta information of the dataset i.e.
classes and palette, but the `classes` and `palette` have
higher priority. Defaults to None.
""" # noqa
# Set default value. When calling
# `SegLocalVisualizer().dataset_meta=xxx`,
# it will override the default value.
if dataset_name is None:
dataset_name = 'cityscapes'
classes = classes if classes else get_classes(dataset_name)
palette = palette if palette else get_palette(dataset_name)
assert len(classes) == len(
palette), 'The length of classes should be equal to palette'
self.dataset_meta: dict = {'classes': classes, 'palette': palette}
@master_only
def add_datasample(
self,
@ -186,6 +226,6 @@ class SegLocalVisualizer(Visualizer):
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
mmcv.imwrite(drawn_img, out_file)
mmcv.imwrite(mmcv.bgr2rgb(drawn_img), out_file)
else:
self.add_image(name, drawn_img, step)

View File

@ -1,9 +1,19 @@
# Projects
Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here.
The OpenMMLab ecosystem can only grow through the contributions of the community.
Everyone is welcome to post their implementation of any great ideas in this folder! If you wish to start your own project, please go through the [example project](example_project/) for the best practice. For common questions about projects, please read our [faq](faq.md).
And that's why we have this `Projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html#best-practices).
## External Projects
Everyone is welcome to post their implementation of any great ideas in this folder! If you wish to start your own project, please go through the [example project](example_project/) for the best practice.
There are also selected external projects released in the community that use MMSegmentation:
Note: The core maintainers of MMSegmentation only ensure the results are reproducible and the code quality meets its claim at the time each project was submitted, but they may not be responsible for future maintenance. The original authors take responsibility for maintaining their own projects.
- [SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation](https://github.com/visual-attention-network/segnext)
- [Vision Transformer Adapter for Dense Predictions](https://github.com/czczup/ViT-Adapter)
- [UniFormer: Unifying Convolution and Self-attention for Visual Recognition](https://github.com/Sense-X/UniFormer)
- [Multi-Scale High-Resolution Vision Transformer for Semantic Segmentation](https://github.com/facebookresearch/HRViT)
- [ViTAE: Vision Transformer Advanced by Exploring Intrinsic Inductive Bias](https://github.com/ViTAE-Transformer/ViTAE-Transformer)
- [DAFormer: Improving Network Architectures and Training Strategies for Domain-Adaptive Semantic Segmentation](https://github.com/lhoyer/DAFormer)
- [MPViT : Multi-Path Vision Transformer for Dense Prediction](https://github.com/youngwanLEE/MPViT)
- [TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation](https://github.com/hustvl/TopFormer)
Note: These projects are supported and maintained by their own contributors. The core maintainers of MMSegmentation only ensure the results are reproducible and the code quality meets its claim at the time each project was submitted, but they may not be responsible for future maintenance.

View File

@ -1,20 +1,26 @@
# Dummy ResNet Wrapper
This is an example README for community `projects/`. We have provided detailed explanations for each field in the form of html comments, which are visible when you read the source of this README file. If you wish to submit your project to our main repository, then all the fields in this README are mandatory for others to understand what you have achieved in this implementation. For more details, read our [contribution guide](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/.github/CONTRIBUTING.md) or approach us in [Discussions](https://github.com/open-mmlab/mmsegmentation/discussions).
> A README.md template for releasing a project.
>
> All the fields in this README are **mandatory** for others to understand what you have achieved in this implementation.
> Please read our [Projects FAQ](../faq.md) if you still feel unclear about the requirements, or raise an [issue](https://github.com/open-mmlab/mmsegmentation/issues) to us!
## Description
<!-- Share any information you would like others to know. For example:
> Share any information you would like others to know. For example:
>
> Author: @xxx.
>
> This is an implementation of \[XXX\].
Author: @xxx.
This is an implementation of \[XXX\]. -->
Author @xxx.
This project implements a dummy ResNet wrapper, which literally does nothing new but prints "hello world" during initialization.
## Usage
<!-- For a typical model, this section should contain the commands for training and testing. You are also suggested to dump your environment specification to env.yml by `conda env export > env.yml`. -->
> For a typical model, this section should contain the commands for training and testing.
> You are also suggested to dump your environment specification to env.yml by `conda env export > env.yml`.
### Prerequisites
@ -47,9 +53,8 @@ mim train mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.p
mim test mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py --work-dir work_dirs/dummy_resnet --checkpoint ${CHECKPOINT_PATH}
```
<!-- List the results as usually done in other model's README. [Example](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/configs/fcn#results-and-models)
You should claim whether this is based on the pre-trained weights, which are converted from the official release; or it's a reproduced result obtained from retraining the model in this project. -->
> List the results as usually done in other model's README. \[Example\](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/configs/fcn#results-and-models
> You should claim whether this is based on the pre-trained weights, which are converted from the official release; or it's a reproduced result obtained from retraining the model in this project
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@ -57,7 +62,7 @@ You should claim whether this is based on the pre-trained weights, which are con
## Citation
<!-- You may remove this section if not applicable. -->
> You may remove this section if not applicable.
```bibtex
@misc{mmseg2020,
@ -72,58 +77,58 @@ You should claim whether this is based on the pre-trained weights, which are con
Here is a checklist illustrating a usual development workflow of a successful project, and also serves as an overview of this project's progress.
<!-- The PIC (person in charge) or contributors of this project should check all the items that they believe have been finished, which will further be verified by codebase maintainers via a PR.
> The PIC (person in charge) or contributors of this project should check all the items that they believe have been finished, which will further be verified by codebase maintainers via a PR.
OpenMMLab's maintainer will review the code to ensure the project's quality. Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. But this project is only eligible to become a part of the core package upon attaining the last milestone.
> OpenMMLab's maintainer will review the code to ensure the project's quality. Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. But this project is only eligible to become a part of the core package upon attaining the last milestone.
Note that keeping this section up-to-date is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed.
> Note that keeping this section up-to-date is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed.
A project does not necessarily have to be finished in a single PR, but it's essential for the project to at least reach the first milestone in its very first PR. -->
> A project does not necessarily have to be finished in a single PR, but it's essential for the project to at least reach the first milestone in its very first PR.
- [ ] Milestone 1: PR-ready, and acceptable to be one of the `projects/`.
- [ ] Finish the code
<!-- The code's design shall follow existing interfaces and convention. For example, each model component should be registered into `mmseg.registry.MODELS` and configurable via a config file. -->
> The code's design shall follow existing interfaces and convention. For example, each model component should be registered into `mmseg.registry.MODELS` and configurable via a config file.
- [ ] Basic docstrings & proper citation
- [ ] Basic docstrings & proper citation
<!-- Each major object should contain a docstring, describing its functionality and arguments. If you have adapted the code from other open-source projects, don't forget to cite the source project in docstring and make sure your behavior is not against its license. Typically, we do not accept any code snippet under GPL license. [A Short Guide to Open Source Licenses](https://medium.com/nationwide-technology/a-short-guide-to-open-source-licenses-cf5b1c329edd) -->
> Each major object should contain a docstring, describing its functionality and arguments. If you have adapted the code from other open-source projects, don't forget to cite the source project in docstring and make sure your behavior is not against its license. Typically, we do not accept any code snippet under GPL license. [A Short Guide to Open Source Licenses](https://medium.com/nationwide-technology/a-short-guide-to-open-source-licenses-cf5b1c329edd)
- [ ] Test-time correctness
- [ ] Test-time correctness
<!-- If you are reproducing the result from a paper, make sure your model's inference-time performance matches that in the original paper. The weights usually could be obtained by simply renaming the keys in the official pre-trained weights. This test could be skipped though, if you are able to prove the training-time correctness and check the second milestone. -->
> If you are reproducing the result from a paper, make sure your model's inference-time performance matches that in the original paper. The weights usually could be obtained by simply renaming the keys in the official pre-trained weights. This test could be skipped though, if you are able to prove the training-time correctness and check the second milestone.
- [ ] A full README
- [ ] A full README
<!-- As this template does. -->
> As this template does.
- [ ] Milestone 2: Indicates a successful model implementation.
- [ ] Training-time correctness
<!-- If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range. -->
> If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range.
- [ ] Milestone 3: Good to be a part of our core package!
- [ ] Type hints and docstrings
<!-- Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/io.py#L9) -->
> Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/io.py#L9)
- [ ] Unit tests
- [ ] Unit tests
<!-- Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/tests/test_utils/test_io.py#L14) -->
> Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/tests/test_utils/test_io.py#L14)
- [ ] Code polishing
- [ ] Code polishing
<!-- Refactor your code according to reviewer's comment. -->
> Refactor your code according to reviewer's comment.
- [ ] Metafile.yml
- [ ] Metafile.yml
<!-- It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn.yml) -->
> It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn.yml)
- [ ] Move your modules into the core package following the codebase's file hierarchy structure.
<!-- In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/README.md) -->
> In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/README.md)
- [ ] Refactor your modules into the core package following the codebase's file hierarchy structure.

View File

@ -1,6 +1,6 @@
_base_ = ['../../../configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py']
_base_ = ['mmseg::fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py']
custom_imports = dict(imports=['projects.example_project.dummy'])
custom_imports = dict(imports=['dummy'])
crop_size = (512, 1024)
data_preprocessor = dict(size=crop_size)

19
projects/faq.md Normal file
View File

@ -0,0 +1,19 @@
Q1: Why set up `projects/` folder?
Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here. And that's why we have this `projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html#best-practices).
Q2: Why should there be a checklist for a project?
This checkelist is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed.
Q3: What kind of PR will be merged?
Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. That is, the very first PR of a project must have all the terms in the first milestone checked. We do not have any extra requirements on the project's following PRs, so they can be a minor bug fix or update, and do not have to achieve one milestone at once. But keep in mind that this project is only eligible to become a part of the core package upon attaining the last milestone.
Q4: Compared to other models in the core packages, why do the model implementations in projects have different training/testing commands?
Projects are organized independently from the core package, and therefore their modules cannot be directly imported by train.py and test.py. Each model implementation in projects should either use `mim` for training/testing as suggested in the example project or provide a custom train.py/test.py.
Q5: How to debug a project with a debugger?
Debugger makes our lives easier, but using it becomes a bit tricky if we have to train/test a model via `mim`. The way to circumvent that is that we can take advantage of relative path to import these modules. Assuming that we are developing a project X and the core modules are placed under `projects/X/modules`, then simply adding `custom_imports = dict(imports='projects.X.modules')` to the config allows us to debug from usual entrypoints (e.g. `tools/train.py`) from the root directory of the algorithm library. Just don't forget to remove 'projects.X' before project publishment.

View File

@ -1,12 +1,10 @@
# HieraSeg
Support `Deep Hierarchical Semantic Segmentation` interface on `cityscapes`
# HSSN
## Description
Author: AI-Tianlong
This project implements `HieraSeg` inference in the `cityscapes` dataset
This project implements `Deep Hierarchical Semantic Segmentation` inference on `cityscapes` dataset
## Usage
@ -14,17 +12,17 @@ This project implements `HieraSeg` inference in the `cityscapes` dataset
- Python 3.8
- PyTorch 1.6 or higher
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) v1.0.0rc3
- mmcv v2.0.0rc3
- mmengine
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) v1.0.0rc5
- mmcv v2.0.0rc4
- mmengine >=0.4.0
### Dataset preparing
preparing `cityscapes` dataset like this [structure](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets)
Preparing `cityscapes` dataset following this [Dataset Preparing Guide](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets)
### Testing commands
please put [`hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth`](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) to `mmsegmentation/checkpoints`
Please put [`hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth`](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) to `mmsegmentation/checkpoints`
#### Multi-GPUs Test
@ -36,7 +34,7 @@ bash tools/dist_test.sh [configs] [model weights] [number of gpu] --tta
#### Example
```shell
bash tools/dist_test.sh projects/HieraSeg_project/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py checkpoints/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth 2 --tta
bash tools/dist_test.sh projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py checkpoints/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth 2 --tta
```
## Results

Some files were not shown because too many files have changed in this diff Show More