diff --git a/.circleci/test.yml b/.circleci/test.yml
index f92775915..76f9f70d8 100644
--- a/.circleci/test.yml
+++ b/.circleci/test.yml
@@ -49,32 +49,25 @@ jobs:
- run:
name: Configure Python & pip
command: |
- python -m pip install --upgrade pip
- python -m pip install wheel
+ pip install --upgrade pip
+ pip install wheel
- run:
name: Install PyTorch
command: |
python -V
- python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
+ pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
- run:
name: Install mmseg dependencies
command: |
- python -m pip install git+https://github.com/open-mmlab/mmengine.git@main
- python -m pip install -U openmim 'importlib-metadata<2'
- python -m mim install 'mmcv>=2.0.0rc1'
- python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
- python -m pip install -r requirements/tests.txt -r requirements/optional.txt
+ pip install git+https://github.com/open-mmlab/mmengine.git@main
+ pip install -U openmim
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification@dev-1.x
+ pip install -r requirements/tests.txt -r requirements/optional.txt
- run:
name: Build and install
command: |
- python -m pip install -e .
- - run:
- name: Run unittests
- command: |
- python -m pip install timm
- python -m coverage run --branch --source mmseg -m pytest tests/
- python -m coverage xml
- python -m coverage report -m
+ pip install -e .
- run:
name: Skip timm unittests and generate coverage report
command: |
@@ -101,8 +94,8 @@ jobs:
# Cloning repos in VM since Docker doesn't have access to the private key
name: Clone Repos
command: |
- git clone -b main --depth 1 ssh://git@github.com/open-mmlab/mmengine.git /home/circleci/mmengine
- git clone -b dev-1.x --depth 1 ssh://git@github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification
+ git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine
+ git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification
- run:
name: Build Docker image
command: |
@@ -112,19 +105,18 @@ jobs:
name: Install mmseg dependencies
command: |
docker exec mmseg pip install -e /mmengine
- docker exec mmseg pip install -U openmim 'importlib-metadata<2'
+ docker exec mmseg pip install -U openmim
docker exec mmseg mim install 'mmcv>=2.0.0rc1'
docker exec mmseg pip install -e /mmclassification
- docker exec mmseg python -m pip install -r requirements.txt
+ docker exec mmseg pip install -r requirements/tests.txt -r requirements/optional.txt
- run:
name: Build and install
command: |
docker exec mmseg pip install -e .
- run:
- name: Run unittests
+ name: Run unittests but skip timm unittests
command: |
- docker exec mmseg python -m pip install timm
- docker exec mmseg python -m pytest tests/
+ docker exec mmseg pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
workflows:
pr_stage_lint:
when: << pipeline.parameters.lint_only >>
@@ -159,8 +151,8 @@ workflows:
- lint
- build_cpu:
name: maximum_version_cpu
- torch: 1.9.0
- torchvision: 0.10.0
+ torch: 1.12.1
+ torchvision: 0.13.1
python: 3.9.0
requires:
- minimum_version_cpu
diff --git a/.dev/check_urls.py b/.dev/check_urls.py
index 42b64745d..58a1354ba 100644
--- a/.dev/check_urls.py
+++ b/.dev/check_urls.py
@@ -56,8 +56,7 @@ def main():
for model_name, yml_path in yml_list:
# Default yaml loader unsafe.
- model_infos = yml.load(
- open(yml_path, 'r'), Loader=yml.CLoader)['Models']
+ model_infos = yml.load(open(yml_path), Loader=yml.CLoader)['Models']
for model_info in model_infos:
config_name = model_info['Name']
checkpoint_url = model_info['Weights']
diff --git a/.dev/gather_models.py b/.dev/gather_models.py
index 82f812752..fe6c3901c 100644
--- a/.dev/gather_models.py
+++ b/.dev/gather_models.py
@@ -35,7 +35,7 @@ def process_checkpoint(in_file, out_file):
# The hash code calculation and rename command differ on different system
# platform.
sha = calculate_file_sha256(out_file)
- final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+ final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
os.rename(out_file, final_file)
# Remove prefix and suffix
@@ -54,7 +54,7 @@ def get_final_iter(config):
def get_final_results(log_json_path, iter_num):
result_dict = dict()
last_iter = 0
- with open(log_json_path, 'r') as f:
+ with open(log_json_path) as f:
for line in f.readlines():
log_line = json.loads(line)
if 'mode' not in log_line.keys():
@@ -125,7 +125,7 @@ def main():
exp_dir = osp.join(work_dir, config_name)
# check whether the exps is finished
final_iter = get_final_iter(used_config)
- final_model = 'iter_{}.pth'.format(final_iter)
+ final_model = f'iter_{final_iter}.pth'
model_path = osp.join(exp_dir, final_model)
# skip if the model is still training
diff --git a/.dev/generate_benchmark_train_script.py b/.dev/generate_benchmark_train_script.py
index 6e8a0ae31..4e6fa181f 100644
--- a/.dev/generate_benchmark_train_script.py
+++ b/.dev/generate_benchmark_train_script.py
@@ -74,7 +74,7 @@ def main():
commands.append('\n')
commands.append('\n')
- with open(args.txt_path, 'r') as f:
+ with open(args.txt_path) as f:
model_cfgs = f.readlines()
for i, cfg in enumerate(model_cfgs):
create_train_bash_info(commands, cfg, script_name, '$PARTITION',
diff --git a/.dev/log_collector/log_collector.py b/.dev/log_collector/log_collector.py
index d0f408087..0c2ff6188 100644
--- a/.dev/log_collector/log_collector.py
+++ b/.dev/log_collector/log_collector.py
@@ -86,7 +86,7 @@ def main():
val_list = []
last_iter = 0
for log_name in log_list:
- with open(os.path.join(preceding_path, log_name), 'r') as f:
+ with open(os.path.join(preceding_path, log_name)) as f:
# ignore the info line
f.readline()
all_lines = f.readlines()
diff --git a/.dev/md2yml.py b/.dev/md2yml.py
index b6c9daf09..fc9c67e47 100755
--- a/.dev/md2yml.py
+++ b/.dev/md2yml.py
@@ -15,7 +15,7 @@ import sys
from lxml import etree
from mmengine.fileio import dump
-MMSEG_ROOT = osp.dirname(osp.dirname((osp.dirname(__file__))))
+MMSEG_ROOT = osp.dirname(osp.dirname(osp.dirname(__file__)))
COLLECTIONS = [
'ANN', 'APCNet', 'BiSeNetV1', 'BiSeNetV2', 'CCNet', 'CGNet', 'DANet',
@@ -42,7 +42,7 @@ def dump_yaml_and_check_difference(obj, filename, sort_keys=False):
str_dump = dump(obj, None, file_format='yaml', sort_keys=sort_keys)
if osp.isfile(filename):
file_exists = True
- with open(filename, 'r', encoding='utf-8') as f:
+ with open(filename, encoding='utf-8') as f:
str_orig = f.read()
else:
file_exists = False
@@ -97,7 +97,7 @@ def parse_md(md_file):
# should be set with head or neck of this config file.
is_backbone = None
- with open(md_file, 'r', encoding='UTF-8') as md:
+ with open(md_file, encoding='UTF-8') as md:
lines = md.readlines()
i = 0
current_dataset = ''
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index fb862298c..000000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,261 +0,0 @@
-name: build
-
-on:
- push:
- paths-ignore:
- - 'demo/**'
- - '.dev/**'
- - 'docker/**'
- - 'tools/**'
- - '**.md'
-
- pull_request:
- paths-ignore:
- - 'demo/**'
- - '.dev/**'
- - 'docker/**'
- - 'tools/**'
- - 'docs/**'
- - '**.md'
-
-concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
- cancel-in-progress: true
-
-jobs:
- build_cpu:
- runs-on: ubuntu-18.04
- strategy:
- matrix:
- python-version: [3.7]
- torch: [1.6.0, 1.7.0, 1.8.0, 1.9.0]
- include:
- - torch: 1.6.0
- torch_version: torch1.6
- torchvision: 0.7.0
- - torch: 1.7.0
- torch_version: torch1.7
- torchvision: 0.8.1
- - torch: 1.8.0
- torch_version: torch1.8
- torchvision: 0.9.0
- - torch: 1.9.0
- torch_version: torch1.9
- torchvision: 0.10.0
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python-version }}
- - name: Upgrade pip
- run: pip install pip --upgrade
- - name: Install Pillow
- run: pip install Pillow==6.2.2
- if: ${{matrix.torchvision == '0.4.2'}}
- - name: Install PyTorch
- run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
- - name: Install MMEngine
- run: |
- pip install git+https://github.com/open-mmlab/mmengine.git
- python -c 'from mmengine.utils.dl_utils import collect_env;print(collect_env())'
- - name: Install MMCV
- run: |
- pip install -U openmim
- mim install 'mmcv>=2.0.0rc1'
- python -c 'import mmcv; print(mmcv.__version__)'
- - name: Install unittest dependencies
- run: |
- pip install -r requirements.txt
- - name: Build and install
- run: rm -rf .eggs && pip install -e .
- - name: Run unittests and generate coverage report
- run: |
- pip install timm
- coverage run --branch --source mmseg -m pytest tests/
- coverage xml
- coverage report -m
- - name: Skip timm unittests and generate coverage report
- run: |
- coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
- coverage xml
- coverage report -m
-
- build_cuda101:
- runs-on: ubuntu-18.04
- container:
- image: pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel
-
- strategy:
- matrix:
- python-version: [3.7]
- torch:
- [
- 1.6.0+cu101,
- 1.7.0+cu101,
- 1.8.0+cu101
- ]
- include:
- - torch: 1.6.0+cu101
- torch_version: torch1.6
- torchvision: 0.7.0+cu101
- - torch: 1.7.0+cu101
- torch_version: torch1.7
- torchvision: 0.8.1+cu101
- - torch: 1.8.0+cu101
- torch_version: torch1.8
- torchvision: 0.9.0+cu101
-
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python-version }}
- - name: Fetch GPG keys
- run: |
- apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
- apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
- - name: Install system dependencies
- run: |
- apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 python${{matrix.python-version}}-dev
- apt-get clean
- rm -rf /var/lib/apt/lists/*
- - name: Install Pillow
- run: python -m pip install Pillow==6.2.2
- if: ${{matrix.torchvision < 0.5}}
- - name: Install PyTorch
- run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
- - name: Install mmseg dependencies
- run: |
- python -V
- python -m pip install -U openmim
- python -m pip install git+https://github.com/open-mmlab/mmengine.git
- mim install 'mmcv>=2.0.0rc1'
- python -c 'import mmcv; print(mmcv.__version__)'
- python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
- - name: Install unittest dependencies
- run: python -m pip install -r requirements/tests.txt -r requirements/optional.txt
- - name: Build and install
- run: |
- rm -rf .eggs
- python setup.py check -m -s
- TORCH_CUDA_ARCH_LIST=7.0 python -m pip install .
- - name: Run unittests and generate coverage report
- run: |
- python -m pip install timm
- coverage run --branch --source mmseg -m pytest tests/
- coverage xml
- coverage report -m
- - name: Skip timm unittests and generate coverage report
- run: |
- coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
- coverage xml
- coverage report -m
-
- build_cuda102:
- runs-on: ubuntu-18.04
- container:
- image: pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
-
- strategy:
- matrix:
- python-version: [3.6, 3.7, 3.8, 3.9]
- torch: [1.9.0+cu102]
- include:
- - torch: 1.9.0+cu102
- torch_version: torch1.9
- torchvision: 0.10.0+cu102
-
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python-version }}
- - name: Fetch GPG keys
- run: |
- apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
- apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
- - name: Install system dependencies
- run: |
- apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
- apt-get clean
- rm -rf /var/lib/apt/lists/*
- - name: Install Pillow
- run: python -m pip install Pillow==6.2.2
- if: ${{matrix.torchvision < 0.5}}
- - name: Install PyTorch
- run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
- - name: Install mmseg dependencies
- run: |
- python -V
- python -m pip install -U openmim
- python -m pip install git+https://github.com/open-mmlab/mmengine.git
- LC_ALL=C.UTF-8 LC_ALL=C.UTF-8 mim install 'mmcv>=2.0.0rc1'
- python -c 'import mmcv; print(mmcv.__version__)'
- python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
- - name: Install unittest dependencies
- run: python -m pip install -r requirements/tests.txt -r requirements/optional.txt
- - name: Build and install
- run: |
- rm -rf .eggs
- python setup.py check -m -s
- TORCH_CUDA_ARCH_LIST=7.0 python -m pip install .
- - name: Run unittests and generate coverage report
- run: |
- python -m pip install timm
- coverage run --branch --source mmseg -m pytest tests/
- coverage xml
- coverage report -m
- - name: Upload coverage to Codecov
- uses: codecov/codecov-action@v2
- with:
- files: ./coverage.xml
- flags: unittests
- env_vars: OS,PYTHON
- name: codecov-umbrella
- fail_ci_if_error: false
-
- test_windows:
- runs-on: ${{ matrix.os }}
- strategy:
- matrix:
- os: [windows-2022]
- python: [3.8]
- platform: [cpu, cu111]
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python }}
- uses: actions/setup-python@v2
- with:
- python-version: ${{ matrix.python }}
- - name: Upgrade pip
- run: pip install pip --upgrade --user
- - name: Install OpenCV
- run: pip install opencv-python>=3
- - name: Install PyTorch
- # As a complement to Linux CI, we test on PyTorch LTS version
- run: pip install torch==1.8.2+${{ matrix.platform }} torchvision==0.9.2+${{ matrix.platform }} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
- - name: Install MMEngine
- run: |
- pip install git+https://github.com/open-mmlab/mmengine.git
- - name: Install MMCV
- run: |
- pip install -U openmim
- mim install 'mmcv>=2.0.0rc1'
- - name: Install MMClassification
- run: |
- pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
- - name: Install unittest dependencies
- run: pip install -r requirements/tests.txt -r requirements/optional.txt
- - name: Build and install
- run: pip install -e .
- - name: Run unittests
- run: |
- python -m pip install timm
- coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
- - name: Generate coverage report
- run: |
- coverage xml
- coverage report -m
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index de1cab255..644eaf651 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -28,4 +28,4 @@ jobs:
- name: Check docstring coverage
run: |
python -m pip install interrogate
- interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmseg/ops --ignore-regex "__repr__" --fail-under 75 mmseg
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 75 mmseg
diff --git a/.github/workflows/merge_stage_test.yml b/.github/workflows/merge_stage_test.yml
new file mode 100644
index 000000000..42a9dc0c4
--- /dev/null
+++ b/.github/workflows/merge_stage_test.yml
@@ -0,0 +1,203 @@
+name: merge_stage_test
+
+on:
+ push:
+ paths-ignore:
+ - 'README.md'
+ - 'README_zh-CN.md'
+ - 'docs/**'
+ - 'demo/**'
+ - '.dev_scripts/**'
+ - '.circleci/**'
+ branches:
+ - dev-1.x
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build_cpu_py:
+ runs-on: ubuntu-18.04
+ strategy:
+ matrix:
+ python-version: [3.6, 3.8, 3.9]
+ torch: [1.8.1]
+ include:
+ - torch: 1.8.1
+ torchvision: 0.9.1
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: pip install pip --upgrade
+ - name: Install Pillow
+ run: pip install Pillow==6.2.2
+ if: ${{matrix.torchvision == '0.4.2'}}
+ - name: Install PyTorch
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: rm -rf .eggs && pip install -e .
+ - name: Run unittests and generate coverage report
+ run: |
+ pip install timm
+ coverage run --branch --source mmseg -m pytest tests/
+ coverage xml
+ coverage report -m
+ build_cpu_pt:
+ runs-on: ubuntu-18.04
+ strategy:
+ matrix:
+ python-version: [3.7]
+ torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0]
+ include:
+ - torch: 1.6.0
+ torchvision: 0.7.0
+ - torch: 1.7.1
+ torchvision: 0.8.2
+ - torch: 1.8.1
+ torchvision: 0.9.1
+ - torch: 1.9.1
+ torchvision: 0.10.1
+ - torch: 1.10.1
+ torchvision: 0.11.2
+ - torch: 1.11.0
+ torchvision: 0.12.0
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: pip install pip --upgrade
+ - name: Install Pillow
+ run: pip install Pillow==6.2.2
+ - name: Install PyTorch
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: rm -rf .eggs && pip install -e .
+ - name: Run unittests and generate coverage report
+ # timm from v0.6.11 requires torch>=1.7
+ if: ${{matrix.torch >= '1.7.0'}}
+ run: |
+ pip install timm
+ coverage run --branch --source mmseg -m pytest tests/
+ coverage xml
+ coverage report -m
+ - name: Skip timm unittests and generate coverage report
+ run: |
+ coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
+ coverage xml
+ coverage report -m
+ # Only upload coverage report for python3.7 && pytorch1.8.1 without timm
+ - name: Upload coverage to Codecov
+ if: ${{matrix.torch == '1.8.1' && matrix.python-version == '3.7'}}
+ uses: codecov/codecov-action@v2
+ with:
+ files: ./coverage.xml
+ flags: unittests
+ env_vars: OS,PYTHON
+ name: codecov-umbrella
+ fail_ci_if_error: false
+
+ build_cu102:
+ runs-on: ubuntu-18.04
+ container:
+ image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
+ strategy:
+ matrix:
+ python-version: [3.7]
+ include:
+ - torch: 1.8.1
+ cuda: 10.2
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: pip install pip --upgrade
+ - name: Fetch GPG keys
+ run: |
+ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+ - name: Install Python-dev
+ run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
+ if: ${{matrix.python-version != 3.9}}
+ - name: Install system dependencies
+ run: |
+ apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: |
+ python setup.py check -m -s
+ TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
+ build_windows:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [windows-2022]
+ python: [3.7]
+ platform: [cpu, cu111]
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: python -m pip install pip --upgrade
+ - name: Install OpenCV
+ run: pip install opencv-python>=3
+ - name: Install PyTorch
+ run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: pip install -e .
+ - name: Run unittests
+ run: |
+ pip install timm
+ coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
+ - name: Generate coverage report
+ run: |
+ coverage xml
+ coverage report -m
diff --git a/.github/workflows/pr_stage_test.yml b/.github/workflows/pr_stage_test.yml
new file mode 100644
index 000000000..30e50a962
--- /dev/null
+++ b/.github/workflows/pr_stage_test.yml
@@ -0,0 +1,140 @@
+name: pr_stage_test
+
+on:
+ pull_request:
+ paths-ignore:
+ - 'README.md'
+ - 'README_zh-CN.md'
+ - 'docs/**'
+ - 'demo/**'
+ - '.dev_scripts/**'
+ - '.circleci/**'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build_cpu:
+ runs-on: ubuntu-18.04
+ strategy:
+ matrix:
+ python-version: [3.7]
+ include:
+ - torch: 1.8.1
+ torchvision: 0.9.1
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: pip install pip --upgrade
+ - name: Install Pillow
+ run: pip install Pillow==6.2.2
+ if: ${{matrix.torchvision == '0.4.2'}}
+ - name: Install PyTorch
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
+ - name: Install other dependencies
+ run: |
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: rm -rf .eggs && pip install -e .
+ - name: Run unittests and generate coverage report
+ run: |
+ coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
+ coverage xml
+ coverage report -m
+ # Upload coverage report for python3.7 && pytorch1.8.1 cpu without timm
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v1.0.14
+ with:
+ file: ./coverage.xml
+ flags: unittests
+ env_vars: OS,PYTHON
+ name: codecov-umbrella
+ fail_ci_if_error: false
+
+ build_cu102:
+ runs-on: ubuntu-18.04
+ container:
+ image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
+ strategy:
+ matrix:
+ python-version: [3.7]
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: pip install pip --upgrade
+ - name: Fetch GPG keys
+ run: |
+ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+ - name: Install Python-dev
+ run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
+ if: ${{matrix.python-version != 3.9}}
+ - name: Install system dependencies
+ run: |
+ apt-get update
+ apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: |
+ python setup.py check -m -s
+ TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
+ build_windows:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [windows-2022]
+ python: [3.7]
+ platform: [cpu, cu111]
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: python -m pip install pip --upgrade
+ - name: Install OpenCV
+ run: pip install opencv-python>=3
+ - name: Install PyTorch
+ run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+ - name: Install mmseg dependencies
+ run: |
+ python -V
+ pip install -U openmim
+ pip install git+https://github.com/open-mmlab/mmengine.git
+ mim install 'mmcv>=2.0.0rc1'
+ pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
+ - name: Install unittest dependencies
+ run: pip install -r requirements/tests.txt -r requirements/optional.txt
+ - name: Build and install
+ run: pip install -e .
+ - name: Run unittests
+ run: |
+ pip install timm
+ coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
+ - name: Generate coverage report
+ run: |
+ coverage xml
+ coverage report -m
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 898c091a1..34b120968 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
repos:
- repo: https://gitlab.com/pycqa/flake8.git
- rev: 3.8.3
+ rev: 5.0.4
hooks:
- id: flake8
- repo: https://github.com/PyCQA/isort
@@ -8,11 +8,11 @@ repos:
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
- rev: v0.30.0
+ rev: v0.32.0
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v3.1.0
+ rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: check-yaml
@@ -34,7 +34,7 @@ repos:
- mdformat_frontmatter
- linkify-it-py
- repo: https://github.com/codespell-project/codespell
- rev: v2.1.0
+ rev: v2.2.1
hooks:
- id: codespell
- repo: https://github.com/myint/docformatter
@@ -52,6 +52,11 @@ repos:
language: python
files: ^configs/.*\.md$
require_serial: true
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v3.0.0
+ hooks:
+ - id: pyupgrade
+ args: ["--py36-plus"]
- repo: https://github.com/open-mmlab/pre-commit-hooks
rev: v0.2.0 # Use the rev to fix revision
hooks:
diff --git a/README.md b/README.md
index ae169d3fb..8a0bc52ec 100644
--- a/README.md
+++ b/README.md
@@ -62,12 +62,11 @@ The 1.x branch works with **PyTorch 1.6+**.
## What's New
-v1.0.0rc0 was released in 31/8/2022.
+v1.0.0rc1 was released in 2/11/2022.
Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history.
-- Unifies interfaces of all components based on MMEngine.
-- Faster training and testing speed with complete support of mixed precision training.
-- Refactored and more flexible architecture.
+- Support PoolFormer ([#2191](https://github.com/open-mmlab/mmsegmentation/pull/2191))
+- Add Decathlon dataset ([#2227](https://github.com/open-mmlab/mmsegmentation/pull/2227))
## Installation
@@ -102,6 +101,7 @@ Supported backbones:
- [x] [BEiT (ICLR'2022)](configs/beit)
- [x] [ConvNeXt (CVPR'2022)](configs/convnext)
- [x] [MAE (CVPR'2022)](configs/mae)
+- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
Supported methods:
@@ -198,6 +198,7 @@ This project is released under the [Apache 2.0 license](LICENSE).
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
+- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 72ba0b0d0..975fca4ee 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -61,7 +61,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
## 更新日志
-最新版本 v1.0.0rc0 在 2022.8.31 发布。
+最新版本 v1.0.0rc1 在 2022.11.2 发布。
如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/notes/changelog.md)。
## 安装
@@ -96,6 +96,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
- [x] [BEiT (ICLR'2022)](configs/beit)
- [x] [ConvNeXt (CVPR'2022)](configs/convnext)
- [x] [MAE (CVPR'2022)](configs/mae)
+- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
已支持的算法:
@@ -189,6 +190,7 @@ MMSegmentation 是一个由来自不同高校和企业的研发人员共同参
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
+- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
diff --git a/configs/_base_/models/fpn_poolformer_s12.py b/configs/_base_/models/fpn_poolformer_s12.py
new file mode 100644
index 000000000..483d82330
--- /dev/null
+++ b/configs/_base_/models/fpn_poolformer_s12.py
@@ -0,0 +1,50 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa
+custom_imports = dict(imports='mmcls.models', allow_failed_imports=False)
+data_preprocessor = dict(
+ type='SegDataPreProcessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True,
+ pad_val=0,
+ seg_pad_val=255)
+model = dict(
+ type='EncoderDecoder',
+ data_preprocessor=data_preprocessor,
+ backbone=dict(
+ type='mmcls.PoolFormer',
+ arch='s12',
+ init_cfg=dict(
+ type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
+ in_patch_size=7,
+ in_stride=4,
+ in_pad=2,
+ down_patch_size=3,
+ down_stride=2,
+ down_pad=1,
+ drop_rate=0.,
+ drop_path_rate=0.,
+ out_indices=(0, 2, 4, 6),
+ frozen_stages=0,
+ ),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=4),
+ decode_head=dict(
+ type='FPNHead',
+ in_channels=[256, 256, 256, 256],
+ in_index=[0, 1, 2, 3],
+ feature_strides=[4, 8, 16, 32],
+ channels=128,
+ dropout_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ # model training and testing settings
+ train_cfg=dict(),
+ test_cfg=dict(mode='whole'))
diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md
index 516ad9dfc..4c80ffaf9 100644
--- a/configs/deeplabv3/README.md
+++ b/configs/deeplabv3/README.md
@@ -55,7 +55,7 @@ In this work, we revisit atrous convolution, a powerful tool to explicitly adjus
| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) |
| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) |
| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) |
-| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 76.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) |
+| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 75.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) |
| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) |
| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) |
diff --git a/configs/deeplabv3/deeplabv3.yml b/configs/deeplabv3/deeplabv3.yml
index 0bc615d20..619621299 100644
--- a/configs/deeplabv3/deeplabv3.yml
+++ b/configs/deeplabv3/deeplabv3.yml
@@ -326,7 +326,7 @@ Models:
- Task: Semantic Segmentation
Dataset: Cityscapes
Metrics:
- mIoU: 76.63
+ mIoU: 75.63
mIoU(ms+flip): 77.51
Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py
index 436ba4a14..ece9b0bf8 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py
index 30dd88253..86eec0d94 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py
index f0fd513bd..195046edc 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py
@@ -9,6 +9,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320, c1_in_channels=24),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py
index 17fb52a7c..d4f669f16 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320, c1_in_channels=24),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py
index 1453adb8e..0829f438a 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py
index 64e715ca9..015fa6f20 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py
index da44ef8ef..8542e0266 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py
index 17a4d211e..73db59bea 100644
--- a/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py
+++ b/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py
@@ -7,6 +7,7 @@ model = dict(
widen_factor=1.,
strides=(1, 2, 2, 1, 1, 1, 1),
dilations=(1, 1, 1, 2, 2, 4, 4),
- out_indices=(1, 2, 4, 6)),
+ out_indices=(1, 2, 4, 6),
+ norm_cfg=dict(type='SyncBN', requires_grad=True)),
decode_head=dict(in_channels=320),
auxiliary_head=dict(in_channels=96))
diff --git a/configs/poolformer/README.md b/configs/poolformer/README.md
new file mode 100644
index 000000000..3bdd2ba3f
--- /dev/null
+++ b/configs/poolformer/README.md
@@ -0,0 +1,65 @@
+# PoolFormer
+
+[MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418)
+
+## Introduction
+
+
+
+Official Repo
+
+Code Snippet
+
+## Abstract
+
+
+
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design. Code is available at [this https URL](https://github.com/sail-sg/poolformer)
+
+
+
+
+

+
+
+## Citation
+
+```bibtex
+@inproceedings{yu2022metaformer,
+ title={Metaformer is actually what you need for vision},
+ author={Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10819--10829},
+ year={2022}
+}
+```
+
+### Usage
+
+- PoolFormer backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks.
+
+```shell
+pip install "mmcls>=1.0.0rc0"
+```
+
+- The pretrained models could also be downloaded from [PoolFormer config of MMClassification](https://github.com/open-mmlab/mmclassification/tree/master/configs/poolformer).
+
+## Results and models
+
+### ADE20K
+
+| Method | Backbone | Crop Size | pretrain | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | mIoU\* | mIoU\*(ms+flip) | config | download |
+| ------ | -------------- | --------- | ----------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------ | --------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| FPN | PoolFormer-S12 | 512x512 | ImageNet-1K | 32 | 40000 | 4.17 | 23.48 | 36.68 | - | 37.07 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json) |
+| FPN | PoolFormer-S24 | 512x512 | ImageNet-1K | 32 | 40000 | 5.47 | 15.74 | 40.12 | - | 40.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json) |
+| FPN | PoolFormer-S36 | 512x512 | ImageNet-1K | 32 | 40000 | 6.77 | 11.34 | 41.61 | - | 41.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json) |
+| FPN | PoolFormer-M36 | 512x512 | ImageNet-1K | 32 | 40000 | 8.59 | 8.97 | 41.95 | - | 42.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json) |
+| FPN | PoolFormer-M48 | 512x512 | ImageNet-1K | 32 | 40000 | 10.48 | 6.69 | 42.43 | - | 42.76 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json) |
+
+Note:
+
+- We replace `AlignedResize` in original PoolFormer implementation to `Resize + ResizeToMultiple`.
+
+- `mIoU` with * is collected when `Resize + ResizeToMultiple` is adopted in `test_pipeline`, so do `mIoU` in logs.
+
+- The Test Time Augmentation i.e., "ms+flip" in MMSegmentation v1.x is developing, stay tuned!
diff --git a/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py
new file mode 100644
index 000000000..4100eb992
--- /dev/null
+++ b/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py
@@ -0,0 +1,11 @@
+_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m36_3rdparty_32xb128_in1k_20220414-c55e0949.pth' # noqa
+
+# model settings
+model = dict(
+ backbone=dict(
+ arch='m36',
+ init_cfg=dict(
+ type='Pretrained', checkpoint=checkpoint_file,
+ prefix='backbone.')),
+ neck=dict(in_channels=[96, 192, 384, 768]))
diff --git a/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py
new file mode 100644
index 000000000..cfc49ccbd
--- /dev/null
+++ b/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py
@@ -0,0 +1,11 @@
+_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m48_3rdparty_32xb128_in1k_20220414-9378f3eb.pth' # noqa
+
+# model settings
+model = dict(
+ backbone=dict(
+ arch='m48',
+ init_cfg=dict(
+ type='Pretrained', checkpoint=checkpoint_file,
+ prefix='backbone.')),
+ neck=dict(in_channels=[96, 192, 384, 768]))
diff --git a/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py
new file mode 100644
index 000000000..c0b15312f
--- /dev/null
+++ b/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py
@@ -0,0 +1,91 @@
+_base_ = [
+ '../_base_/models/fpn_poolformer_s12.py', '../_base_/default_runtime.py',
+ '../_base_/schedules/schedule_40k.py'
+]
+
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = 'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+data_preprocessor = dict(size=crop_size)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(
+ type='RandomResize',
+ scale=(2048, 512),
+ ratio_range=(0.5, 2.0),
+ keep_ratio=True),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', prob=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='PackSegInputs')
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
+ dict(type='ResizeToMultiple', size_divisor=32),
+ # add loading annotation after ``Resize`` because ground truth
+ # does not need to do resize data transform
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(type='PackSegInputs')
+]
+
+train_dataloader = dict(
+ batch_size=4,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='InfiniteSampler', shuffle=True),
+ dataset=dict(
+ type='RepeatDataset',
+ times=50,
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='images/training',
+ seg_map_path='annotations/training'),
+ pipeline=train_pipeline)))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='images/validation',
+ seg_map_path='annotations/validation'),
+ pipeline=test_pipeline))
+test_dataloader = val_dataloader
+val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
+test_evaluator = val_evaluator
+
+# model settings
+model = dict(
+ data_preprocessor=data_preprocessor,
+ neck=dict(in_channels=[64, 128, 320, 512]),
+ decode_head=dict(num_classes=150))
+
+# optimizer
+# optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.0001)
+# optimizer_config = dict()
+# # learning policy
+# lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
+optim_wrapper = dict(
+ _delete_=True,
+ type='AmpOptimWrapper',
+ optimizer=dict(type='AdamW', lr=0.0002, weight_decay=0.0001))
+param_scheduler = [
+ dict(
+ type='PolyLR',
+ power=0.9,
+ begin=0,
+ end=40000,
+ eta_min=0.0,
+ by_epoch=False,
+ )
+]
diff --git a/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py b/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py
new file mode 100644
index 000000000..1f9d24cd4
--- /dev/null
+++ b/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py
@@ -0,0 +1,9 @@
+_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s24_3rdparty_32xb128_in1k_20220414-d7055904.pth' # noqa
+# model settings
+model = dict(
+ backbone=dict(
+ arch='s24',
+ init_cfg=dict(
+ type='Pretrained', checkpoint=checkpoint_file,
+ prefix='backbone.')))
diff --git a/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py b/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py
new file mode 100644
index 000000000..231dcf6c2
--- /dev/null
+++ b/configs/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k.py
@@ -0,0 +1,10 @@
+_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s36_3rdparty_32xb128_in1k_20220414-d78ff3e8.pth' # noqa
+
+# model settings
+model = dict(
+ backbone=dict(
+ arch='s36',
+ init_cfg=dict(
+ type='Pretrained', checkpoint=checkpoint_file,
+ prefix='backbone.')))
diff --git a/configs/poolformer/poolformer.yml b/configs/poolformer/poolformer.yml
new file mode 100644
index 000000000..fa5fc3012
--- /dev/null
+++ b/configs/poolformer/poolformer.yml
@@ -0,0 +1,106 @@
+Models:
+- Name: fpn_poolformer_s12_8xb4-40k_ade20k-512x512
+ In Collection: FPN
+ Metadata:
+ backbone: PoolFormer-S12
+ crop size: (512,512)
+ lr schd: 40000
+ inference time (ms/im):
+ - value: 42.59
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 4.17
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 36.68
+ Config: configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth
+- Name: fpn_poolformer_s24_8xb4-40k_ade20k-512x512
+ In Collection: FPN
+ Metadata:
+ backbone: PoolFormer-S24
+ crop size: (512,512)
+ lr schd: 40000
+ inference time (ms/im):
+ - value: 63.53
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 5.47
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 40.12
+ Config: configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth
+- Name: ''
+ In Collection: FPN
+ Metadata:
+ backbone: PoolFormer-S36
+ crop size: (512,512)
+ lr schd: 40000
+ inference time (ms/im):
+ - value: 88.18
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 6.77
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 41.61
+ Config: ''
+ Weights: ''
+- Name: fpn_poolformer_m36_8xb4-40k_ade20k-512x512
+ In Collection: FPN
+ Metadata:
+ backbone: PoolFormer-M36
+ crop size: (512,512)
+ lr schd: 40000
+ inference time (ms/im):
+ - value: 111.48
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 8.59
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 41.95
+ Config: configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth
+- Name: fpn_poolformer_m48_8xb4-40k_ade20k-512x512
+ In Collection: FPN
+ Metadata:
+ backbone: PoolFormer-M48
+ crop size: (512,512)
+ lr schd: 40000
+ inference time (ms/im):
+ - value: 149.48
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 10.48
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 42.43
+ Config: configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth
diff --git a/configs/segformer/README.md b/configs/segformer/README.md
index 655c2e92a..be64099da 100644
--- a/configs/segformer/README.md
+++ b/configs/segformer/README.md
@@ -77,20 +77,13 @@ using `AlignedResize`, you can change the dataset pipeline like this:
```python
test_pipeline = [
dict(type='LoadImageFromFile'),
- dict(
- type='MultiScaleFlipAug',
- img_scale=(2048, 512),
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
- flip=False,
- transforms=[
- dict(type='Resize', keep_ratio=True),
- # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU.
- dict(type='ResizeToMultiple', size_divisor=32),
- dict(type='RandomFlip'),
- dict(type='Normalize', **img_norm_cfg),
- dict(type='ImageToTensor', keys=['img']),
- dict(type='Collect', keys=['img']),
- ])
+ dict(type='Resize', scale=(2048, 512), keep_ratio=True),
+ # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU.
+ dict(type='ResizeToMultiple', size_divisor=32),
+ # add loading annotation after ``Resize`` because ground truth
+ # does not need to do resize data transform
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(type='PackSegInputs')
]
```
diff --git a/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py b/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py
index a31592557..dc1e4c898 100644
--- a/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py
+++ b/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py
@@ -1,4 +1,4 @@
-_base_ = './segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py'
+_base_ = './segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py'
model = dict(
decode_head=dict(
diff --git a/configs/setr/README.md b/configs/setr/README.md
index 3bae5d9e7..1aa3f245a 100644
--- a/configs/setr/README.md
+++ b/configs/setr/README.md
@@ -63,7 +63,7 @@ This script convert the model from `PRETRAIN_PATH` and store the converted model
| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) |
| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) |
| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) |
-| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.54 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) |
+| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.39 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) |
### Cityscapes
diff --git a/configs/setr/setr.yml b/configs/setr/setr.yml
index 1e87179ab..6a9987089 100644
--- a/configs/setr/setr.yml
+++ b/configs/setr/setr.yml
@@ -92,7 +92,7 @@ Models:
- Task: Semantic Segmentation
Dataset: ADE20K
Metrics:
- mIoU: 47.54
+ mIoU: 47.39
mIoU(ms+flip): 49.37
Config: configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth
diff --git a/configs/swin/README.md b/configs/swin/README.md
index 55d119d17..4ab20e80b 100644
--- a/configs/swin/README.md
+++ b/configs/swin/README.md
@@ -71,6 +71,6 @@ In our default setting, pretrained models and their corresponding [original mode
| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) |
| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) |
| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) |
-| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.31 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) |
+| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.13 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) |
| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) |
| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) |
diff --git a/configs/swin/swin.yml b/configs/swin/swin.yml
index aaf00ec05..783d839c8 100644
--- a/configs/swin/swin.yml
+++ b/configs/swin/swin.yml
@@ -75,7 +75,7 @@ Models:
- Task: Semantic Segmentation
Dataset: ADE20K
Metrics:
- mIoU: 50.31
+ mIoU: 50.13
mIoU(ms+flip): 51.9
Config: configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth
diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb
index 4bcbfcba6..f679f997a 100644
--- a/demo/MMSegmentation_Tutorial.ipynb
+++ b/demo/MMSegmentation_Tutorial.ipynb
@@ -7,7 +7,7 @@
"id": "view-in-github"
},
"source": [
- "
"
+ "
"
]
},
{
@@ -68,8 +68,12 @@
"source": [
"# Install PyTorch\n",
"!conda install pytorch=1.10.0 torchvision cudatoolkit=11.1 -c pytorch\n",
+ "# Install mim\n",
+ "!pip install -U openmim\n",
+ "# Install mmengine\n",
+ "!mim install mmengine\n",
"# Install MMCV\n",
- "!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10/index.html"
+ "!mim install 'mmcv >= 2.0.0rc1'"
]
},
{
@@ -85,7 +89,7 @@
"outputs": [],
"source": [
"!rm -rf mmsegmentation\n",
- "!git clone https://github.com/open-mmlab/mmsegmentation.git \n",
+ "!git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git \n",
"%cd mmsegmentation\n",
"!pip install -e ."
]
@@ -111,110 +115,15 @@
"print(mmseg.__version__)"
]
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "eUcuC3dUv32I"
- },
- "source": [
- "## Run Inference with MMSeg trained weight"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "2hd41IGaiNet",
- "outputId": "b7b2aafc-edf2-43e4-ea43-0b5dd0aa4b4a"
- },
- "outputs": [],
- "source": [
- "!mkdir checkpoints\n",
- "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "H8Fxg8i-wHJE"
- },
- "outputs": [],
- "source": [
- "from mmseg.apis import inference_model, init_model, show_result_pyplot\n",
- "from mmseg.utils import get_palette"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "umk8sJ0Xuace"
- },
- "outputs": [],
- "source": [
- "config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n",
- "checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "nWlQFuTgudxu",
- "outputId": "5e45f4f6-5bcf-4d04-bb9c-0428ee84a576"
- },
- "outputs": [],
- "source": [
- "# build the model from a config file and a checkpoint file\n",
- "model = init_model(config_file, checkpoint_file, device='cuda:0')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "izFv6pSRujk9"
- },
- "outputs": [],
- "source": [
- "# test a single image\n",
- "img = 'demo/demo.png'\n",
- "result = inference_model(model, img)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 504
- },
- "id": "bDcs9udgunQK",
- "outputId": "7c55f713-4085-47fd-fa06-720a321d0795"
- },
- "outputs": [],
- "source": [
- "# show the results\n",
- "show_result_pyplot(model, img, result, get_palette('cityscapes'))"
- ]
- },
{
"cell_type": "markdown",
"metadata": {
"id": "Ta51clKX4cwM"
},
"source": [
- "## Train a semantic segmentation model on a new dataset\n",
+ "## Finetune a semantic segmentation model on a new dataset\n",
"\n",
- "To train on a customized dataset, the following steps are necessary. \n",
+ "To finetune on a customized dataset, the following steps are necessary. \n",
"1. Add a new dataset class. \n",
"2. Create a config file accordingly. \n",
"3. Perform training and evaluation. "
@@ -268,8 +177,10 @@
"source": [
"# Let's take a look at the dataset\n",
"import mmcv\n",
+ "import mmengine\n",
"import matplotlib.pyplot as plt\n",
"\n",
+ "\n",
"img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
"plt.figure(figsize=(8, 6))\n",
"plt.imshow(mmcv.bgr2rgb(img))\n",
@@ -293,18 +204,30 @@
},
"outputs": [],
"source": [
- "import os.path as osp\n",
- "import numpy as np\n",
- "from PIL import Image\n",
- "# convert dataset annotation to semantic segmentation map\n",
+ "# define dataset root and directory for images and annotations\n",
"data_root = 'iccv09Data'\n",
"img_dir = 'images'\n",
"ann_dir = 'labels'\n",
- "# define class and plaette for better visualization\n",
+ "# define class and palette for better visualization\n",
"classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n",
"palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n",
- " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]\n",
- "for file in mmcv.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
+ " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "WnGZfribFHCx"
+ },
+ "outputs": [],
+ "source": [
+ "import os.path as osp\n",
+ "import numpy as np\n",
+ "from PIL import Image\n",
+ "\n",
+ "# convert dataset annotation to semantic segmentation map\n",
+ "for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
" seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n",
" seg_img = Image.fromarray(seg_map).convert('P')\n",
" seg_img.putpalette(np.array(palette, dtype=np.uint8))\n",
@@ -351,8 +274,8 @@
"source": [
"# split train/val set randomly\n",
"split_dir = 'splits'\n",
- "mmcv.mkdir_or_exist(osp.join(data_root, split_dir))\n",
- "filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(\n",
+ "mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n",
+ "filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n",
" osp.join(data_root, ann_dir), suffix='.png')]\n",
"with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n",
" # select first 4/5 as train set\n",
@@ -380,18 +303,15 @@
},
"outputs": [],
"source": [
- "from mmseg.datasets.builder import DATASETS\n",
- "from mmseg.datasets.custom import BaseSegDataset\n",
+ "from mmseg.registry import DATASETS\n",
+ "from mmseg.datasets import BaseSegDataset\n",
+ "\n",
"\n",
"@DATASETS.register_module()\n",
"class StanfordBackgroundDataset(BaseSegDataset):\n",
- " CLASSES = classes\n",
- " PALETTE = palette\n",
- " def __init__(self, split, **kwargs):\n",
- " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', \n",
- " split=split, **kwargs)\n",
- " assert osp.exists(self.img_dir) and self.split is not None\n",
- "\n",
+ " METAINFO = dict(classes = classes, palette = palette)\n",
+ " def __init__(self, **kwargs):\n",
+ " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n",
" "
]
},
@@ -405,6 +325,16 @@
"In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Download config and checkpoint files\n",
+ "!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -413,8 +343,9 @@
},
"outputs": [],
"source": [
- "from mmcv import Config\n",
- "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')"
+ "from mmengine import Config\n",
+ "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n",
+ "print(f'Config:\\n{cfg.pretty_text}')"
]
},
{
@@ -438,10 +369,10 @@
},
"outputs": [],
"source": [
- "from mmseg.apis import set_random_seed\n",
- "\n",
"# Since we use only one GPU, BN is used instead of SyncBN\n",
"cfg.norm_cfg = dict(type='BN', requires_grad=True)\n",
+ "cfg.crop_size = (256, 256)\n",
+ "cfg.model.data_preprocessor.size = cfg.crop_size\n",
"cfg.model.backbone.norm_cfg = cfg.norm_cfg\n",
"cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n",
"cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n",
@@ -453,79 +384,55 @@
"cfg.dataset_type = 'StanfordBackgroundDataset'\n",
"cfg.data_root = data_root\n",
"\n",
- "cfg.data.samples_per_gpu = 8\n",
- "cfg.data.workers_per_gpu=8\n",
+ "cfg.train_dataloader.batch_size = 8\n",
"\n",
- "cfg.img_norm_cfg = dict(\n",
- " mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n",
- "cfg.crop_size = (256, 256)\n",
"cfg.train_pipeline = [\n",
" dict(type='LoadImageFromFile'),\n",
" dict(type='LoadAnnotations'),\n",
- " dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),\n",
+ " dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n",
" dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n",
- " dict(type='RandomFlip', flip_ratio=0.5),\n",
- " dict(type='PhotoMetricDistortion'),\n",
- " dict(type='Normalize', **cfg.img_norm_cfg),\n",
- " dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),\n",
- " dict(type='DefaultFormatBundle'),\n",
- " dict(type='Collect', keys=['img', 'gt_semantic_seg']),\n",
+ " dict(type='RandomFlip', prob=0.5),\n",
+ " dict(type='PackSegInputs')\n",
"]\n",
"\n",
"cfg.test_pipeline = [\n",
" dict(type='LoadImageFromFile'),\n",
- " dict(\n",
- " type='MultiScaleFlipAug',\n",
- " img_scale=(320, 240),\n",
- " # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],\n",
- " flip=False,\n",
- " transforms=[\n",
- " dict(type='Resize', keep_ratio=True),\n",
- " dict(type='RandomFlip'),\n",
- " dict(type='Normalize', **cfg.img_norm_cfg),\n",
- " dict(type='ImageToTensor', keys=['img']),\n",
- " dict(type='Collect', keys=['img']),\n",
- " ])\n",
+ " dict(type='Resize', scale=(320, 240), keep_ratio=True),\n",
+ " # add loading annotation after ``Resize`` because ground truth\n",
+ " # does not need to do resize data transform\n",
+ " dict(type='LoadAnnotations'),\n",
+ " dict(type='PackSegInputs')\n",
"]\n",
"\n",
"\n",
- "cfg.data.train.type = cfg.dataset_type\n",
- "cfg.data.train.data_root = cfg.data_root\n",
- "cfg.data.train.img_dir = img_dir\n",
- "cfg.data.train.ann_dir = ann_dir\n",
- "cfg.data.train.pipeline = cfg.train_pipeline\n",
- "cfg.data.train.split = 'splits/train.txt'\n",
+ "cfg.train_dataloader.dataset.type = cfg.dataset_type\n",
+ "cfg.train_dataloader.dataset.data_root = cfg.data_root\n",
+ "cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+ "cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n",
+ "cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n",
"\n",
- "cfg.data.val.type = cfg.dataset_type\n",
- "cfg.data.val.data_root = cfg.data_root\n",
- "cfg.data.val.img_dir = img_dir\n",
- "cfg.data.val.ann_dir = ann_dir\n",
- "cfg.data.val.pipeline = cfg.test_pipeline\n",
- "cfg.data.val.split = 'splits/val.txt'\n",
+ "cfg.val_dataloader.dataset.type = cfg.dataset_type\n",
+ "cfg.val_dataloader.dataset.data_root = cfg.data_root\n",
+ "cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+ "cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n",
+ "cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n",
"\n",
- "cfg.data.test.type = cfg.dataset_type\n",
- "cfg.data.test.data_root = cfg.data_root\n",
- "cfg.data.test.img_dir = img_dir\n",
- "cfg.data.test.ann_dir = ann_dir\n",
- "cfg.data.test.pipeline = cfg.test_pipeline\n",
- "cfg.data.test.split = 'splits/val.txt'\n",
+ "cfg.test_dataloader = cfg.val_dataloader\n",
"\n",
- "# We can still use the pre-trained Mask RCNN model though we do not need to\n",
- "# use the mask branch\n",
- "cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
+ "\n",
+ "# Load the pretrained weights\n",
+ "cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
"\n",
"# Set up working dir to save files and logs.\n",
"cfg.work_dir = './work_dirs/tutorial'\n",
"\n",
- "cfg.runner.max_iters = 200\n",
- "cfg.log_config.interval = 10\n",
- "cfg.evaluation.interval = 200\n",
- "cfg.checkpoint_config.interval = 200\n",
+ "cfg.train_cfg.max_iters = 200\n",
+ "cfg.train_cfg.val_interval = 200\n",
+ "cfg.default_hooks.logger.interval = 10\n",
+ "cfg.default_hooks.checkpoint.interval = 200\n",
"\n",
- "# Set seed to facitate reproducing the result\n",
- "cfg.seed = 0\n",
- "set_random_seed(0, deterministic=False)\n",
- "cfg.gpu_ids = range(1)\n",
+ "# Set seed to facilitate reproducing the result\n",
+ "cfg['randomness'] = dict(seed=0)\n",
"\n",
"# Let's have a look at the final config used for training\n",
"print(f'Config:\\n{cfg.pretty_text}')"
@@ -552,23 +459,23 @@
},
"outputs": [],
"source": [
- "from mmseg.datasets import build_dataset\n",
- "from mmseg.models import build_segmentor\n",
- "from mmseg.apis import train_segmentor\n",
+ "from mmengine.runner import Runner\n",
+ "from mmseg.utils import register_all_modules\n",
"\n",
- "\n",
- "# Build the dataset\n",
- "datasets = [build_dataset(cfg.data.train)]\n",
- "\n",
- "# Build the detector\n",
- "model = build_segmentor(cfg.model)\n",
- "# Add an attribute for visualization convenience\n",
- "model.CLASSES = datasets[0].CLASSES\n",
- "\n",
- "# Create work_dir\n",
- "mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
- "train_segmentor(model, datasets, cfg, distributed=False, validate=True, \n",
- " meta=dict())"
+ "# register all modules in mmseg into the registries\n",
+ "# do not init the default scope here because it will be init in the runner\n",
+ "register_all_modules(init_default_scope=False)\n",
+ "runner = Runner.from_cfg(cfg)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start training\n",
+ "runner.train()"
]
},
{
@@ -593,20 +500,17 @@
},
"outputs": [],
"source": [
- "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
+ "from mmseg.apis import inference_model, show_result_pyplot\n",
"\n",
- "model.cfg = cfg\n",
+ "model=runner.model\n",
+ "model.cfg=cfg\n",
+ "\n",
+ "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
"result = inference_model(model, img)\n",
"plt.figure(figsize=(8, 6))\n",
- "show_result_pyplot(model, img, result, palette)"
+ "vis_result = show_result_pyplot(model, img, result, palette)\n",
+ "plt.imshow(mmcv.bgr2rgb(vis_result))"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -618,7 +522,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3.7.13 ('pt1.12')",
"language": "python",
"name": "python3"
},
@@ -632,7 +536,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.0"
+ "version": "3.7.13"
},
"pycharm": {
"stem_cell": {
@@ -642,6 +546,11 @@
},
"source": []
}
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "ffdb7915c29738c259ec7ee5d0d1b9253c264f1fd267d45dd77f1a420396c120"
+ }
}
},
"nbformat": 4,
diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb
index b557e9b2a..f05a94748 100644
--- a/demo/inference_demo.ipynb
+++ b/demo/inference_demo.ipynb
@@ -21,6 +21,8 @@
"outputs": [],
"source": [
"import torch\n",
+ "import mmcv\n",
+ "import matplotlib.pyplot as plt\n",
"from mmengine.model.utils import revert_sync_batchnorm\n",
"from mmseg.apis import init_model, inference_model, show_result_pyplot\n",
"from mmseg.utils import register_all_modules\n",
@@ -71,7 +73,8 @@
"outputs": [],
"source": [
"# show the results\n",
- "show_result_pyplot(model, img, result)"
+ "vis_result = show_result_pyplot(model, img, result)\n",
+ "plt.imshow(mmcv.bgr2rgb(vis_result))"
]
},
{
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 64482b472..56e70ebf4 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,6 +1,7 @@
ARG PYTORCH="1.11.0"
ARG CUDA="11.3"
ARG CUDNN="8"
+ARG MMCV="2.0.0rc1"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
@@ -22,10 +23,12 @@ RUN conda clean --all
ARG PYTORCH
ARG CUDA
ARG MMCV
-RUN ["/bin/bash", "-c", "pip install --no-cache-dir mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN ["/bin/bash", "-c", "pip install openmim"]
+RUN ["/bin/bash", "-c", "mim install mmengine"]
+RUN ["/bin/bash", "-c", "mim install mmcv==${MMCV}"]
# Install MMSegmentation
-RUN git clone https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation
+RUN git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation
WORKDIR /mmsegmentation
ENV FORCE_CUDA="1"
RUN pip install -r requirements.txt
diff --git a/docker/serve/Dockerfile b/docker/serve/Dockerfile
index c1d154528..e43baebd8 100644
--- a/docker/serve/Dockerfile
+++ b/docker/serve/Dockerfile
@@ -3,8 +3,8 @@ ARG CUDA="11.3"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
-ARG MMCV="1.4.8"
-ARG MMSEG="0.24.1"
+ARG MMCV="2.0.0rc1"
+ARG MMSEG="1.0.0rc1"
ENV PYTHONUNBUFFERED TRUE
@@ -26,7 +26,9 @@ RUN pip install torchserve torch-model-archiver
# MMLAB
ARG PYTORCH
ARG CUDA
-RUN ["/bin/bash", "-c", "pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN ["/bin/bash", "-c", "pip install openmim"]
+RUN ["/bin/bash", "-c", "mim install mmengine"]
+RUN ["/bin/bash", "-c", "mim install mmcv==${MMCV}"]
RUN pip install mmsegmentation==${MMSEG}
RUN useradd -m model-server \
diff --git a/docs/en/advanced_guides/add_models.md b/docs/en/advanced_guides/add_models.md
new file mode 100644
index 000000000..1f1969db3
--- /dev/null
+++ b/docs/en/advanced_guides/add_models.md
@@ -0,0 +1,260 @@
+# Add New Modules
+
+## Develop new components
+
+We can customize all the components introduced at [the model documentation](./models.md), such as **backbone**, **head**, **loss function** and **data preprocessor**.
+
+### Add new backbones
+
+Here we show how to develop a new backbone with an example of MobileNet.
+
+1. Create a new file `mmseg/models/backbones/mobilenet.py`.
+
+ ```python
+ import torch.nn as nn
+
+ from mmseg.registry import MODELS
+
+
+ @MODELS.register_module()
+ class MobileNet(nn.Module):
+
+ def __init__(self, arg1, arg2):
+ pass
+
+ def forward(self, x): # should return a tuple
+ pass
+
+ def init_weights(self, pretrained=None):
+ pass
+ ```
+
+2. Import the module in `mmseg/models/backbones/__init__.py`.
+
+ ```python
+ from .mobilenet import MobileNet
+ ```
+
+3. Use it in your config file.
+
+ ```python
+ model = dict(
+ ...
+ backbone=dict(
+ type='MobileNet',
+ arg1=xxx,
+ arg2=xxx),
+ ...
+ ```
+
+### Add new heads
+
+In MMSegmentation, we provide a [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/decode_heads/decode_head.py#L17) for developing all segmentation heads.
+All newly implemented decode heads should be derived from it.
+Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
+
+First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
+PSPNet implements a decode head for segmentation decode.
+To implement a decode head, we need to implement three functions of the new module as the following.
+
+```python
+from mmseg.registry import MODELS
+
+@MODELS.register_module()
+class PSPHead(BaseDecodeHead):
+
+ def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
+ super(PSPHead, self).__init__(**kwargs)
+
+ def init_weights(self):
+ pass
+
+ def forward(self, inputs):
+ pass
+```
+
+Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py`, thus the corresponding registry could find and load them.
+
+To config file of PSPNet is as the following
+
+```python
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
+ backbone=dict(
+ type='ResNetV1c',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ dilations=(1, 1, 2, 4),
+ strides=(1, 2, 1, 1),
+ norm_cfg=norm_cfg,
+ norm_eval=False,
+ style='pytorch',
+ contract_dilation=True),
+ decode_head=dict(
+ type='PSPHead',
+ in_channels=2048,
+ in_index=3,
+ channels=512,
+ pool_scales=(1, 2, 3, 6),
+ dropout_ratio=0.1,
+ num_classes=19,
+ norm_cfg=norm_cfg,
+ align_corners=False,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
+
+```
+
+### Add new loss
+
+Assume you want to add a new loss as `MyLoss` for segmentation decode.
+To add a new loss function, the users need to implement it in `mmseg/models/losses/my_loss.py`.
+The decorator `weighted_loss` enables the loss to be weighted for each element.
+
+```python
+import torch
+import torch.nn as nn
+
+from mmseg.registry import MODELS
+from .utils import weighted_loss
+
+@weighted_loss
+def my_loss(pred, target):
+ assert pred.size() == target.size() and target.numel() > 0
+ loss = torch.abs(pred - target)
+ return loss
+
+@MODELS.register_module()
+class MyLoss(nn.Module):
+
+ def __init__(self, reduction='mean', loss_weight=1.0):
+ super(MyLoss, self).__init__()
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None):
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss = self.loss_weight * my_loss(
+ pred, target, weight, reduction=reduction, avg_factor=avg_factor)
+ return loss
+```
+
+Then the users need to add it in the `mmseg/models/losses/__init__.py`.
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+To use it, modify the `loss_xxx` field.
+Then you need to modify the `loss_decode` field in the head.
+`loss_weight` could be used to balance multiple losses.
+
+```python
+loss_decode=dict(type='MyLoss', loss_weight=1.0))
+```
+
+### Add new data preprocessor
+
+In MMSegmentation 1.x versions, we use [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/data_preprocessor.py#L13) to copy data to the target device and preprocess the data into the model input format as default. Here we show how to develop a new data preprocessor.
+
+1. Create a new file `mmseg/models/my_datapreprocessor.py`.
+
+ ```python
+ from mmengine.model import BaseDataPreprocessor
+
+ from mmseg.registry import MODELS
+
+ @MODELS.register_module()
+ class MyDataPreProcessor(BaseDataPreprocessor):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ def forward(self, data: dict, training: bool=False) -> Dict[str, Any]:
+ # TODO Define the logic for data pre-processing in the forward method
+ pass
+ ```
+
+2. Import your data preprocessor in `mmseg/models/__init__.py`
+
+ ```python
+ from .my_datapreprocessor import MyDataPreProcessor
+ ```
+
+3. Use it in your config file.
+
+ ```python
+ model = dict(
+ data_preprocessor=dict(type='MyDataPreProcessor)
+ ...
+ )
+ ```
+
+## Develop new segmentors
+
+The segmentor is an algorithmic architecture in which users can customize their algorithms by adding customized components and defining the logic of algorithm execution. Please refer to [the model document](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/advanced_guides/models.md) for more details.
+
+Since the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) in MMSegmentation unifies three modes for a forward process, to develop a new segmentor, users need to overwrite `loss`, `predict` and `_forward` methods corresponding to the `loss`, `predict` and `tensor` modes.
+
+Here we show how to develop a new segmentor.
+
+1. Create a new file `mmseg/models/segmentors/my_segmentor.py`.
+
+ ```python
+ from typing import Dict, Optional, Union
+
+ import torch
+
+ from mmseg.registry import MODELS
+ from mmseg.models import BaseSegmentor
+
+ @MODELS.register_module()
+ class MySegmentor(BaseSegmentor):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ # TODO users should build components of the network here
+
+ def loss(self, inputs: Tensor, data_samples: SampleList) -> dict:
+ """Calculate losses from a batch of inputs and data samples."""
+ pass
+
+ def predict(self, inputs: Tensor, data_samples: OptSampleList=None) -> SampleList:
+ """Predict results from a batch of inputs and data samples with post-
+ processing."""
+ pass
+
+ def _forward(self,
+ inputs: Tensor,
+ data_samples: OptSampleList = None) -> Tuple[List[Tensor]]:
+ """Network forward process.
+
+ Usually includes backbone, neck and head forward without any post-
+ processing.
+ """
+ pass
+ ```
+
+2. Import your segmentor in `mmseg/models/segmentors/__init__.py`.
+
+ ```python
+ from .my_segmentor import MySegmentor
+ ```
+
+3. Use it in your config file.
+
+ ```python
+ model = dict(
+ type='MySegmentor'
+ ...
+ )
+ ```
diff --git a/docs/en/advanced_guides/add_modules.md b/docs/en/advanced_guides/add_modules.md
deleted file mode 100644
index b76a69a4e..000000000
--- a/docs/en/advanced_guides/add_modules.md
+++ /dev/null
@@ -1,234 +0,0 @@
-# Add New Modules
-
-## Customize optimizer
-
-Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`.
-You need to first implement the new optimizer in a file, e.g., in `mmseg/engine/optimizers/my_optimizer.py`:
-
-```python
-from mmcv.runner import OPTIMIZERS
-from torch.optim import Optimizer
-
-
-@OPTIMIZERS.register_module
-class MyOptimizer(Optimizer):
-
- def __init__(self, a, b, c)
-
-```
-
-Then add this module in `mmseg/engine/optimizers/__init__.py` thus the registry will
-find the new module and add it:
-
-```python
-from .my_optimizer import MyOptimizer
-```
-
-Then you can use `MyOptimizer` in `optimizer` field of config files.
-In the configs, the optimizers are defined by the field `optimizer` like the following:
-
-```python
-optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
-```
-
-To use your own optimizer, the field can be changed as
-
-```python
-optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
-```
-
-We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
-For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
-
-```python
-optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
-```
-
-The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
-
-## Customize optimizer constructor
-
-Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNoarm layers.
-The users can do those fine-grained parameter tuning through customizing optimizer constructor.
-
-```python
-from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS
-from .cocktail_optimizer import CocktailOptimizer
-
-
-@OPTIM_WRAPPER_CONSTRUCTORS.register_module
-class CocktailOptimizerConstructor(object):
-
- def __init__(self, optim_wrapper_cfg, paramwise_cfg=None):
-
- def __call__(self, model):
-
- return my_optimizer
-
-```
-
-## Develop new components
-
-There are mainly 2 types of components in MMSegmentation.
-
-- backbone: usually stacks of convolutional network to extract feature maps, e.g., ResNet, HRNet.
-- head: the component for semantic segmentation map decoding.
-
-### Add new backbones
-
-Here we show how to develop new components with an example of MobileNet.
-
-1. Create a new file `mmseg/models/backbones/mobilenet.py`.
-
-```python
-import torch.nn as nn
-
-from mmseg.registry import MODELS
-
-
-@MODELS.register_module
-class MobileNet(nn.Module):
-
- def __init__(self, arg1, arg2):
- pass
-
- def forward(self, x): # should return a tuple
- pass
-
- def init_weights(self, pretrained=None):
- pass
-```
-
-2. Import the module in `mmseg/models/backbones/__init__.py`.
-
-```python
-from .mobilenet import MobileNet
-```
-
-3. Use it in your config file.
-
-```python
-model = dict(
- ...
- backbone=dict(
- type='MobileNet',
- arg1=xxx,
- arg2=xxx),
- ...
-```
-
-### Add new heads
-
-In MMSegmentation, we provide a base [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py) for all segmentation head.
-All newly implemented decode heads should be derived from it.
-Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
-
-First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
-PSPNet implements a decode head for segmentation decode.
-To implement a decode head, basically we need to implement three functions of the new module as the following.
-
-```python
-from mmseg.registry import MODELS
-
-@MODELS.register_module()
-class PSPHead(BaseDecodeHead):
-
- def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
- super(PSPHead, self).__init__(**kwargs)
-
- def init_weights(self):
-
- def forward(self, inputs):
-
-```
-
-Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py` thus the corresponding registry could find and load them.
-
-To config file of PSPNet is as the following
-
-```python
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-model = dict(
- type='EncoderDecoder',
- pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
- backbone=dict(
- type='ResNetV1c',
- depth=50,
- num_stages=4,
- out_indices=(0, 1, 2, 3),
- dilations=(1, 1, 2, 4),
- strides=(1, 2, 1, 1),
- norm_cfg=norm_cfg,
- norm_eval=False,
- style='pytorch',
- contract_dilation=True),
- decode_head=dict(
- type='PSPHead',
- in_channels=2048,
- in_index=3,
- channels=512,
- pool_scales=(1, 2, 3, 6),
- dropout_ratio=0.1,
- num_classes=19,
- norm_cfg=norm_cfg,
- align_corners=False,
- loss_decode=dict(
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
-
-```
-
-### Add new loss
-
-Assume you want to add a new loss as `MyLoss` for segmentation decode.
-To add a new loss function, the users need implement it in `mmseg/models/losses/my_loss.py`.
-The decorator `weighted_loss` enable the loss to be weighted for each element.
-
-```python
-import torch
-import torch.nn as nn
-
-from mmseg.registry import MODELS
-from .utils import weighted_loss
-
-@weighted_loss
-def my_loss(pred, target):
- assert pred.size() == target.size() and target.numel() > 0
- loss = torch.abs(pred - target)
- return loss
-
-@LOSSES.register_module
-class MyLoss(nn.Module):
-
- def __init__(self, reduction='mean', loss_weight=1.0):
- super(MyLoss, self).__init__()
- self.reduction = reduction
- self.loss_weight = loss_weight
-
- def forward(self,
- pred,
- target,
- weight=None,
- avg_factor=None,
- reduction_override=None):
- assert reduction_override in (None, 'none', 'mean', 'sum')
- reduction = (
- reduction_override if reduction_override else self.reduction)
- loss = self.loss_weight * my_loss(
- pred, target, weight, reduction=reduction, avg_factor=avg_factor)
- return loss
-```
-
-Then the users need to add it in the `mmseg/models/losses/__init__.py`.
-
-```python
-from .my_loss import MyLoss, my_loss
-
-```
-
-To use it, modify the `loss_xxx` field.
-Then you need to modify the `loss_decode` field in the head.
-`loss_weight` could be used to balance multiple losses.
-
-```python
-loss_decode=dict(type='MyLoss', loss_weight=1.0))
-```
diff --git a/docs/en/advanced_guides/evaluation.md b/docs/en/advanced_guides/evaluation.md
index b394c7690..55728281a 100644
--- a/docs/en/advanced_guides/evaluation.md
+++ b/docs/en/advanced_guides/evaluation.md
@@ -1 +1,158 @@
# Evaluation
+
+The evaluation procedure would be executed at [ValLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L300) and [TestLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L373), users can evaluate model performance during training or using the test script with simple settings in the configuration file. The `ValLoop` and `TestLoop` are properties of [Runner](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L59), they will be built the first time they are called. To build the `ValLoop` successfully, the `val_dataloader` and `val_evaluator` must be set when building `Runner` since `dataloder` and `evaluator` are required parameters, and the same goes for `TestLoop`. For more information about the Runner's design, please refer to the [documentoation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) of [MMEngine](https://github.com/open-mmlab/mmengine).
+
+
+
+ test_step/val_step dataflow
+
+
+In MMSegmentation, we write the settings of dataloader and metrics in the config files of datasets and the configuration of the evaluation loop in the `schedule_x` config files by default.
+
+For example, in the ADE20K config file `configs/_base_/dataset/ade20k.py`, on lines 37 to 48, we configured the `val_dataloader`, on line 51, we select `IoUMetric` as the evaluator and set `mIoU` as the metric:
+
+```python
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='images/validation',
+ seg_map_path='annotations/validation'),
+ pipeline=test_pipeline))
+
+val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
+```
+
+To be able to evaluate the model during training, for example, we add the evaluation configuration to the file `configs/schedules/schedule_40k.py` on lines 15 to 16:
+
+```python
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
+val_cfg = dict(type='ValLoop')
+```
+
+With the above two settings, MMSegmentation evaluates the **mIoU** metric of the model once every 4000 iterations during the training of 40K iterations.
+
+If we would like to test the model after training, we need to add the `test_dataloader`, `test_evaluator` and `test_cfg` configs to the config file.
+
+```python
+test_dataloader = dict(
+ batch_size=1,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='images/validation',
+ seg_map_path='annotations/validation'),
+ pipeline=test_pipeline))
+
+test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
+test_cfg = dict(type='TestLoop')
+```
+
+In MMSegmentation, the settings of `test_dataloader` and `test_evaluator` are the same as the `ValLoop`'s dataloader and evaluator by default, we can modify these settings to meet our needs.
+
+## IoUMetric
+
+MMSegmentation implements [IoUMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/iou_metric.py) and [CitysMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/citys_metric.py) for evaluating the performance of models, based on the [BaseMetric](https://github.com/open-mmlab/mmengine/blob/main/mmengine/evaluator/metric.py) provided by [MMEngine](https://github.com/open-mmlab/mmengine). Please refer to [the documentation](https://mmengine.readthedocs.io/en/latest/tutorials/evaluation.html) for more details about the unified evaluation interface.
+
+Here we briefly describe the arguments and the two main methods of `IoUMetric`.
+
+The constructor of `IoUMetric` has some additional parameters besides the base `collect_device` and `prefix`.
+
+The arguments of the constructor:
+
+- ignore_index (int) - Index that will be ignored in evaluation. Default: 255.
+- iou_metrics (list\[str\] | str) - Metrics to be calculated, the options includes 'mIoU', 'mDice' and 'mFscore'.
+- nan_to_num (int, optional) - If specified, NaN values will be replaced by the numbers defined by the user. Default: None.
+- beta (int) - Determines the weight of recall in the combined score. Default: 1.
+- collect_device (str) - Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
+- prefix (str, optional) - The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If the prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None.
+
+`IoUMetric` implements the IoU metric calculation, the core two methods of `IoUMetric` are `process` and `compute_metrics`.
+
+- `process` method processes one batch of data and data_samples.
+- `compute_metrics` method computes the metrics from processed results.
+
+#### IoUMetric.process
+
+Parameters:
+
+- data_batch (Any) - A batch of data from the dataloader.
+- data_samples (Sequence\[dict\]) - A batch of outputs from the model.
+
+Returns:
+
+This method doesn't have returns since the processed results would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed.
+
+#### IoUMetric.compute_metrics
+
+Parameters:
+
+- results (list) - The processed results of each batch.
+
+Returns:
+
+- Dict\[str, float\] - The computed metrics. The keys are the names of the metrics, and the values are corresponding results. The key mainly includes **aAcc**, **mIoU**, **mAcc**, **mDice**, **mFscore**, **mPrecision**, **mRecall**.
+
+## CitysMetric
+
+`CitysMetric` uses the official [CityscapesScripts](https://github.com/mcordts/cityscapesScripts) provided by Cityscapes to evaluate model performance.
+
+### Usage
+
+Before using it, please install the `cityscapesscripts` package first:
+
+```shell
+pip install cityscapesscripts
+```
+
+Since the `IoUMetric` is used as the default evaluator in MMSegmentation, if you would like to use `CitysMetric`, customizing the config file is required. In your customized config file, you should overwrite the default evaluator as follows.
+
+```python
+val_evaluator = dict(type='CitysMetric', citys_metrics=['cityscapes'])
+test_evaluator = val_evaluator
+```
+
+### Interface
+
+The arguments of the constructor:
+
+- ignore_index (int) - Index that will be ignored in evaluation. Default: 255.
+- citys_metrics (list\[str\] | str) - Metrics to be evaluated, Default: \['cityscapes'\].
+- to_label_id (bool) - whether convert output to label_id for submission. Default: True.
+- suffix (str): The filename prefix of the png files. If the prefix is "somepath/xxx", the png files will be named "somepath/xxx.png". Default: '.format_cityscapes'.
+- collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
+- prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If the prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None.
+
+#### CitysMetric.process
+
+This method would draw the masks on images and save the painted images to `work_dir`.
+
+Parameters:
+
+- data_batch (Any) - A batch of data from the dataloader.
+- data_samples (Sequence\[dict\]) - A batch of outputs from the model.
+
+Returns:
+
+This method doesn't have returns, the annotations' path would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed.
+
+#### CitysMetric.compute_metrics
+
+This method would call `cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling` tool to calculate metrics.
+
+Parameters:
+
+- results (list) - Testing results of the dataset.
+
+Returns:
+
+- dict\[str: float\] - Cityscapes evaluation results.
diff --git a/docs/en/advanced_guides/models.md b/docs/en/advanced_guides/models.md
index 91361720e..8202e95b7 100644
--- a/docs/en/advanced_guides/models.md
+++ b/docs/en/advanced_guides/models.md
@@ -1 +1,179 @@
# Models
+
+# Models
+
+We usually define a neural network in a deep learning task as a model, and this model is the core of an algorithm. [MMEngine](https://github.com/open-mmlab/mmengine) abstracts a unified model [BaseModel](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/base_model.py#L16) to standardize the interfaces for training, testing and other processes. All models implemented by MMSegmentation inherit from `BaseModel`, and in MMSegmentation we implemented forward and added some functions for the semantic segmentation algorithm.
+
+## Common components
+
+### Segmentor
+
+In MMSegmentation, we abstract the network architecture as a **Segmentor**, it is a model that contains all components of a network. We have already implemented **EncoderDecoder** and **CascadeEncoderDecoder**, which typically consist of **Data preprocessor**, **Backbone**, **Decode head** and **Auxiliary head**.
+
+### Data preprocessor
+
+**Data preprocessor** is the part that copies data to the target device and preprocesses the data into the model input format.
+
+### Backbone
+
+**Backbone** is the part that transforms an image to feature maps, such as a **ResNet-50** without the last fully connected layer.
+
+### Neck
+
+**Neck** is the part that connects the backbone and heads. It performs some refinements or reconfigurations on the raw feature maps produced by the backbone. An example is **Feature Pyramid Network (FPN)**.
+
+### Decode Head
+
+**Decode Head** is the part that transforms the feature maps into a segmentation mask, such as **PSPNet**.
+
+### Auxiliary head
+
+**Auxiliary head** is an optional component that transforms the feature maps into segmentation masks which only used for computing auxiliary losses.
+
+## Basic interfaces
+
+MMSegmentation wraps `BaseModel` and implements the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) class, which mainly provides the interfaces `forward`, `train_step`, `val_step` and `test_step`. The following will introduce these interfaces in detail.
+
+### forward
+
+
+
+ EncoderDecoder dataflow
+
+
+
+
+ CascadeEncoderDecoder dataflow
+
+
+The `forward` method returns losses or predictions of training, validation, testing, and a simple inference process.
+
+The method should accept three modes: "tensor", "predict" and "loss":
+
+- "tensor": Forward the whole network and return the tensor or tuple of tensor without any post-processing, same as a common `nn.Module`.
+- "predict": Forward and return the predictions, which are fully processed to a list of `SegDataSample`.
+- "loss": Forward and return a `dict` of losses according to the given inputs and data samples.
+
+**Note:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) is a data structure interface of MMSegmentation, it is used as an interface between different components. `SegDataSample` implements the abstract data element `mmengine.structures.BaseDataElement`, please refer to [the SegDataSample documentation](https://mmsegmentation.readthedocs.io/en/1.x/advanced_guides/structures.html) and [data element documentation](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/data_element.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information.
+
+Note that this method doesn't handle either backpropagation or optimizer updating, which are done in the method `train_step`.
+
+Parameters:
+
+- inputs (torch.Tensor) - The input tensor with shape (N, C, ...) in general.
+- data_sample (list\[[SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py)\]) - The seg data samples. It usually includes information such as `metainfo` and `gt_sem_seg`. Default to None.
+- mode (str) - Return what kind of value. Defaults to 'tensor'.
+
+Returns:
+
+- `dict` or `list`:
+ - If `mode == "loss"`, return a `dict` of loss tensor used for backward and logging.
+ - If `mode == "predict"`, return a `list` of `SegDataSample`, the inference results will be incrementally added to the `data_sample` parameter passed to the forward method, each `SegDataSample` contains the following keys:
+ - pred_sem_seg (`PixelData`): Prediction of semantic segmentation.
+ - seg_logits (`PixelData`): Predicted logits of semantic segmentation before normalization.
+ - If `mode == "tensor"`, return a `tensor` or `tuple of tensor` or `dict` of `tensor` for custom use.
+
+### prediction modes
+
+We briefly describe the fields of the model's configuration in [the config documentation](../user_guides/1_config.md), here we elaborate on the `model.test_cfg` field. `model.test_cfg` is used to control forward behavior, the `forward` method in `"predict"` mode can run in two modes:
+
+- `whole_inference`: If `cfg.model.test_cfg.mode == 'whole'`, model will inference with full images.
+
+ An `whole_inference` mode example config:
+
+ ```python
+ model = dict(
+ type='EncoderDecoder'
+ ...
+ test_cfg=dict(mode='whole')
+ )
+ ```
+
+- `slide_inference`: If `cfg.model.test_cfg.mode == 'slide'`, model will inference by sliding-window. **Note:** if you select the `slide` mode, `cfg.model.test_cfg.stride` and `cfg.model.test_cfg.crop_size` should also be specified.
+
+ An `slide_inference` mode example config:
+
+ ```python
+ model = dict(
+ type='EncoderDecoder'
+ ...
+ test_cfg=dict(mode='slide', crop_size=256, stride=170)
+ )
+ ```
+
+### train_step
+
+The `train_step` method calls the forward interface of the `loss` mode to get the loss `dict`. The `BaseModel` class implements the default model training process including preprocessing, model forward propagation, loss calculation, optimization, and back-propagation.
+
+Parameters:
+
+- data (dict or tuple or list) - Data sampled from the dataset. In MMSegmentation, the data dict contains `inputs` and `data_samples` two fields.
+- optim_wrapper (OptimWrapper) - OptimWrapper instance used to update model parameters.
+
+**Note:** [OptimWrapper](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/optimizer_wrapper.py#L17) provides a common interface for updating parameters, please refer to optimizer wrapper [documentation](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/optim_wrapper.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information.
+
+Returns:
+
+- Dict\[str, `torch.Tensor`\]: A `dict` of tensor for logging.
+
+
+
+ train_step dataflow
+
+
+### val_step
+
+The `val_step` method calls the forward interface of the `predict` mode and returns the prediction result, which is further passed to the process interface of the evaluator and the `after_val_iter` interface of the Hook.
+
+Parameters:
+
+- data (`dict` or `tuple` or `list`) - Data sampled from the dataset. In MMSegmentation, the data dict contains `inputs` and `data_samples` two fields.
+
+Returns:
+
+- `list` - The predictions of given data.
+
+
+
+ test_step/val_step dataflow
+
+
+### test_step
+
+The `BaseModel` implements `test_step` the same as `val_step`.
+
+## Data Preprocessor
+
+The [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) implemented by MMSegmentation inherits from the [BaseDataPreprocessor](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/data_preprocessor.py#L18) implemented by [MMEngine](https://github.com/open-mmlab/mmengine) and provides the functions of data preprocessing and copying data to the target device.
+
+The runner carries the model to the specified device during the construction stage, while the data is carried to the specified device by the [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) in `train_step`, `val_step`, and `test_step`, and the processed data is further passed to the model.
+
+The parameters of the `SegDataPreProcessor` constructor:
+
+- mean (Sequence\[Number\], optional) - The pixel mean of R, G, B channels. Defaults to None.
+- std (Sequence\[Number\], optional) - The pixel standard deviation of R, G, B channels. Defaults to None.
+- size (tuple, optional) - Fixed padding size.
+- size_divisor (int, optional) - The divisor of padded size.
+- pad_val (float, optional) - Padding value. Default: 0.
+- seg_pad_val (float, optional) - Padding value of segmentation map. Default: 255.
+- bgr_to_rgb (bool) - whether to convert image from BGR to RGB. Defaults to False.
+- rgb_to_bgr (bool) - whether to convert image from RGB to RGB. Defaults to False.
+- batch_augments (list\[dict\], optional) - Batch-level augmentations. Default to None.
+
+The data will be processed as follows:
+
+- Collate and move data to the target device.
+- Pad inputs to the input size with defined `pad_val`, and pad seg map with defined `seg_pad_val`.
+- Stack inputs to batch_inputs.
+- Convert inputs from bgr to rgb if the shape of input is (3, H, W).
+- Normalize image with defined std and mean.
+- Do batch augmentations like Mixup and Cutmix during training.
+
+The parameters of the `forward` method:
+
+- data (dict) - data sampled from dataloader.
+- training (bool) - Whether to enable training time augmentation.
+
+The returns of the `forward` method:
+
+- Dict: Data in the same format as the model input.
diff --git a/docs/en/advanced_guides/structures.md b/docs/en/advanced_guides/structures.md
index 985286177..2607242e2 100644
--- a/docs/en/advanced_guides/structures.md
+++ b/docs/en/advanced_guides/structures.md
@@ -1 +1,104 @@
# Structures
+
+To unify input and output interfaces between different models and modules, OpenMMLab 2.0 MMEngine defines an abstract data structure,
+it has implemented basic functions of `Create`, `Read`, `Update`, `Delete`, supported data transferring among different types of devices
+and tensor-like or dictionary-like operations such as `.cpu()`, `.cuda()`, `.get()` and `.detach()`.
+More details can be found [here](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/data_element.md).
+
+MMSegmentation also follows this interface protocol and defines `SegDataSample` which is used to encapsulate the data of semantic segmentation task.
+
+## Semantic Segmentation Data SegDataSample
+
+[SegDataSample](mmseg.structures.SegDataSample) includes three main fields `gt_sem_seg`, `pred_sem_seg` and `seg_logits`, which are used to store the annotation information and prediction results respectively.
+
+| Field | Type | Description |
+| -------------- | ------------------------- | ------------------------------------------ |
+| gt_sem_seg | [`PixelData`](#pixeldata) | Annotation information. |
+| pred_instances | [`PixelData`](#pixeldata) | The predicted result. |
+| seg_logits | [`PixelData`](#pixeldata) | The raw (non-normalized) predicted result. |
+
+The following sample code demonstrates the use of `SegDataSample`.
+
+```python
+import torch
+from mmengine.structures import PixelData
+from mmseg.structures import SegDataSample
+
+img_meta = dict(img_shape=(4, 4, 3),
+ pad_shape=(4, 4, 3))
+data_sample = SegDataSample()
+# defining gt_segmentations for encapsulate the ground truth data
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+
+# add and process property in SegDataSample
+data_sample.gt_sem_seg = gt_segmentations
+assert 'gt_sem_seg' in data_sample
+assert 'sem_seg' in data_sample.gt_sem_seg
+assert 'img_shape' in data_sample.gt_sem_seg.metainfo_keys()
+print(data_sample.gt_sem_seg.shape)
+'''
+(4, 4)
+'''
+print(data_sample)
+'''
+
+) at 0x1c2aae44d60>
+'''
+
+# delete and change property in SegDataSample
+data_sample = SegDataSample()
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+data_sample.gt_sem_seg = gt_segmentations
+data_sample.gt_sem_seg.set_metainfo(dict(img_shape=(4,4,9), pad_shape=(4,4,9)))
+del data_sample.gt_sem_seg.img_shape
+
+# Tensor-like operations
+data_sample = SegDataSample()
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+cuda_gt_segmentations = gt_segmentations.cuda()
+cuda_gt_segmentations = gt_segmentations.to('cuda:0')
+cpu_gt_segmentations = cuda_gt_segmentations.cpu()
+cpu_gt_segmentations = cuda_gt_segmentations.to('cpu')
+```
+
+## Customize New Property in SegDataSample
+
+If you want to customize new property in `SegDataSample`, you may follow [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) below:
+
+```python
+class SegDataSample(BaseDataElement):
+ ...
+
+ @property
+ def xxx_property(self) -> xxxData:
+ return self._xxx_property
+
+ @xxx_property.setter
+ def xxx_property(self, value: xxxData) -> None:
+ self.set_field(value, '_xxx_property', dtype=xxxData)
+
+ @xxx_property.deleter
+ def xxx_property(self) -> None:
+ del self._xxx_property
+```
+
+Then a new property would be added to `SegDataSample`.
diff --git a/docs/en/api.rst b/docs/en/api.rst
index 3da37ce6f..12ec13b2b 100644
--- a/docs/en/api.rst
+++ b/docs/en/api.rst
@@ -30,7 +30,7 @@ optimizers
:members:
mmseg.evaluation
---------------
+-----------------
metrics
^^^^^^^^^^
@@ -75,18 +75,13 @@ necks
.. automodule:: mmseg.models.necks
:members:
-mmseg.ops
---------------
-.. automodule:: mmseg.ops
- :members:
-
mmseg.registry
--------------
.. automodule:: mmseg.registry
:members:
mmseg.structures
---------------
+-----------------
structures
^^^^^^^^^^
@@ -104,6 +99,6 @@ mmseg.utils
:members:
mmseg.visualization
---------------
+----------------------
.. automodule:: mmseg.visualization
:members:
diff --git a/docs/en/conf.py b/docs/en/conf.py
index 87b16f266..e20aab14b 100644
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
@@ -28,7 +28,7 @@ version_file = '../../mmseg/version.py'
def get_version():
- with open(version_file, 'r') as f:
+ with open(version_file) as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
index cac916da8..313501e0d 100644
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
@@ -42,8 +42,8 @@ We recommend that users follow our best practices to install MMSegmentation. How
```shell
pip install -U openmim
-mim install 'mmcv>=2.0.0rc1'
mim install mmengine
+mim install "mmcv>=2.0.0rc1"
```
**Step 1.** Install MMSegmentation.
@@ -51,10 +51,8 @@ mim install mmengine
Case a: If you develop and run mmseg directly, install it from source:
```shell
-git clone https://github.com/open-mmlab/mmsegmentation.git
+git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git
cd mmsegmentation
-git checkout dev-1.x
-# branch 'dev-1.x' set up to track remote branch 'dev-1.x' from 'origin'.
pip install -v -e .
# '-v' means verbose, or more output
# '-e' means installing a project in editable mode,
@@ -64,7 +62,7 @@ pip install -v -e .
Case b: If you use mmsegmentation as a dependency or third-party package, install it with pip:
```shell
-pip install 'mmsegmentation>=1.0.0rc0'
+pip install "mmsegmentation>=1.0.0rc0"
```
### Verify the installation
@@ -159,8 +157,8 @@ thus we only need to install MMCV and MMSegmentation with the following commands
```shell
!pip3 install openmim
-!mim install 'mmcv>=2.0.0rc1'
!mim install mmengine
+!mim install "mmcv>=2.0.0rc1"
```
**Step 2.** Install MMSegmentation from the source.
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 89648f9a0..63cfb924c 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -24,7 +24,7 @@ Welcome to MMSegmentation's documentation!
:maxdepth: 1
:caption: Migration
- migration.md
+ migration/index.rst
.. toctree::
:caption: API Reference
diff --git a/docs/en/migration.md b/docs/en/migration.md
deleted file mode 100644
index ac49e6768..000000000
--- a/docs/en/migration.md
+++ /dev/null
@@ -1 +0,0 @@
-# Migration from MMSegmentation 0.x
diff --git a/docs/en/migration/index.rst b/docs/en/migration/index.rst
new file mode 100644
index 000000000..2843bdbcf
--- /dev/null
+++ b/docs/en/migration/index.rst
@@ -0,0 +1,8 @@
+Migration
+***************
+
+.. toctree::
+ :maxdepth: 1
+
+ interface.md
+ package.md
diff --git a/docs/en/migration/interface.md b/docs/en/migration/interface.md
new file mode 100644
index 000000000..f9a0b86f3
--- /dev/null
+++ b/docs/en/migration/interface.md
@@ -0,0 +1,441 @@
+# Migration from MMSegmentation 0.x
+
+## Introduction
+
+This guide describes the fundamental differences between MMSegmentation 0.x and MMSegmentation 1.x in terms of behaviors and the APIs, and how these all relate to your migration journey.
+
+## New dependencies
+
+MMSegmentation 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [installation tutorial](get_started.md).
+Or install the below packages manually.
+
+1. [MMEngine](https://github.com/open-mmlab/mmengine): MMEngine is the core the OpenMMLab 2.0 architecture, and we splited many compentents unrelated to computer vision from MMCV to MMEngine.
+
+2. [MMCV](https://github.com/open-mmlab/mmcv): The computer vision package of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **2.0.0rc1** version.
+
+3. [MMClassification](https://github.com/open-mmlab/mmclassification)(Optional): The image classification toolbox and benchmark of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **1.0.0rc0** version.
+
+## Train launch
+
+The main improvement of OpenMMLab 2.0 is releasing MMEngine which provides universal and powerful runner for unified interfaces to launch training jobs.
+
+Compared with MMSeg0.x, MMSeg1.x provides fewer command line arguments in `tools/train.py`
+
+
+
+Function |
+Original |
+New |
+
+
+Loading pre-trained checkpoint |
+--load_from=$CHECKPOINT |
+--cfg-options load_from=$CHECKPOINT |
+
+
+Resuming Train from specific checkpoint |
+--resume-from=$CHECKPOINT |
+--resume=$CHECKPOINT |
+
+
+Resuming Train from the latest checkpoint |
+--auto-resume |
+--resume='auto' |
+
+
+Whether not to evaluate the checkpoint during training |
+--no-validate |
+--cfg-options val_cfg=None val_dataloader=None val_evaluator=None |
+
+
+Training device assignment |
+--gpu-id=$DEVICE_ID |
+- |
+
+
+Whether or not set different seeds for different ranks |
+--diff-seed |
+--cfg-options randomness.diff_rank_seed=True |
+
+Whether to set deterministic options for CUDNN backend |
+--deterministic |
+--cfg-options randomness.deterministic=True |
+
+
+## Configuration file
+
+### Model settings
+
+No changes in `model.backbone`, `model.neck`, `model.decode_head` and `model.losses` fields.
+
+Add `model.data_preprocessor` field to configure the `DataPreProcessor`, including:
+
+- `mean`(Sequence, optional): The pixel mean of R, G, B channels. Defaults to None.
+
+- `std`(Sequence, optional): The pixel standard deviation of R, G, B channels. Defaults to None.
+
+- `size`(Sequence, optional): Fixed padding size.
+
+- `size_divisor` (int, optional): The divisor of padded size.
+
+- `seg_pad_val` (float, optional): Padding value of segmentation map. Default: 255.
+
+- `padding_mode` (str): Type of padding. Default: 'constant'.
+
+ - constant: pads with a constant value, this value is specified with pad_val.
+
+- `bgr_to_rgb` (bool): whether to convert image from BGR to RGB.Defaults to False.
+
+- `rgb_to_bgr` (bool): whether to convert image from RGB to RGB. Defaults to False.
+
+**Note:**
+Please refer [models documentation](../advanced_guides/models.md) for more details.
+
+### Dataset settings
+
+Changes in **data**:
+
+The original `data` field is split to `train_dataloader`, `val_dataloader` and `test_dataloader`. This allows us to configure them in fine-grained. For example, you can specify different sampler and batch size during training and test.
+The `samples_per_gpu` is renamed to `batch_size`.
+The `workers_per_gpu` is renamed to `num_workers`.
+
+
+
+Original |
+
+
+```python
+data = dict(
+ samples_per_gpu=4,
+ workers_per_gpu=4,
+ train=dict(...),
+ val=dict(...),
+ test=dict(...),
+)
+```
+
+ |
+
+New |
+
+
+```python
+train_dataloader = dict(
+ batch_size=4,
+ num_workers=4,
+ dataset=dict(...),
+ sampler=dict(type='DefaultSampler', shuffle=True) # necessary
+)
+
+val_dataloader = dict(
+ batch_size=4,
+ num_workers=4,
+ dataset=dict(...),
+ sampler=dict(type='DefaultSampler', shuffle=False) # necessary
+)
+
+test_dataloader = val_dataloader
+```
+
+ |
+
+
+
+Changes in **pipeline**
+
+- The original formatting transforms **`ToTensor`**、**`ImageToTensor`**、**`Collect`** are combined as [`PackSegInputs`](mmseg.datasets.transforms.PackSegInputs)
+- We don't recommend to do **`Normalize`** and **Pad** in the dataset pipeline. Please remove it from pipelines and set it in the `data_preprocessor` field.
+- The original **`Resize`** in MMSeg 1.x has been changed to **`RandomResize`** and the input arguments `img_scale` is renamed to `scale`, and the default value of `keep_ratio` is modified to False.
+
+**Note:**
+We move some work of data transforms to the data preprocessor, like normalization, see [the documentation](package.md) for more details.
+
+
+
+Original |
+
+
+```python
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', prob=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+```
+
+ |
+
+New |
+
+
+```python
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(
+ type='RandomResize',
+ scale=(2560, 640),
+ ratio_range=(0.5, 2.0),
+ keep_ratio=True),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', prob=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='PackSegInputs')
+]
+```
+
+ |
+
+
+
+Changes in **`evaluation`**:
+
+- The **`evaluation`** field is split to `val_evaluator` and `test_evaluator`. And it won't support `interval` and `save_best` arguments.
+ The `interval` is moved to `train_cfg.val_interval`, and the `save_best`
+ is moved to `default_hooks.checkpoint.save_best`. `pre_eval` has been removed.
+- `'mIoU'` has been changed to `'IoUMetric'`.
+
+
+
+Original |
+
+
+```python
+evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
+```
+
+ |
+
+New |
+
+
+```python
+val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
+test_evaluator = val_evaluator
+```
+
+ |
+
+
+
+### Optimizer and Schedule settings
+
+Changes in **`optimizer`** and **`optimizer_config`**:
+
+- Now we use `optim_wrapper` field to specify all configuration about the optimization process. And the
+ `optimizer` is a sub field of `optim_wrapper` now.
+- `paramwise_cfg` is also a sub field of `optim_wrapper`, instead of `optimizer`.
+- `optimizer_config` is removed now, and all configurations of it are moved to `optim_wrapper`.
+- `grad_clip` is renamed to `clip_grad`.
+
+
+
+Original |
+
+
+```python
+optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.0005)
+optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2))
+```
+
+ |
+
+New |
+
+
+```python
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005),
+ clip_grad=dict(max_norm=1, norm_type=2))
+```
+
+ |
+
+
+
+Changes in **`lr_config`**:
+
+- The `lr_config` field is removed and we use new `param_scheduler` to replace it.
+- The `warmup` related arguments are removed, since we use schedulers combination to implement this
+ functionality.
+
+The new schedulers combination mechanism is very flexible, and you can use it to design many kinds of learning
+rate / momentum curves. See [the tutorial](TODO) for more details.
+
+
+
+Original |
+
+
+```python
+lr_config = dict(
+ policy='poly',
+ warmup='linear',
+ warmup_iters=1500,
+ warmup_ratio=1e-6,
+ power=1.0,
+ min_lr=0.0,
+ by_epoch=False)
+```
+
+ |
+
+New |
+
+
+```python
+param_scheduler = [
+ dict(
+ type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
+ dict(
+ type='PolyLR',
+ power=1.0,
+ begin=1500,
+ end=160000,
+ eta_min=0.0,
+ by_epoch=False,
+ )
+]
+```
+
+ |
+
+
+
+Changes in **`runner`**:
+
+Most configuration in the original `runner` field is moved to `train_cfg`, `val_cfg` and `test_cfg`, which
+configure the loop in training, validation and test.
+
+
+
+Original |
+
+
+```python
+runner = dict(type='IterBasedRunner', max_iters=20000)
+```
+
+ |
+
+New |
+
+
+```python
+# The `val_interval` is the original `evaluation.interval`.
+train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
+val_cfg = dict(type='ValLoop') # Use the default validation loop.
+test_cfg = dict(type='TestLoop') # Use the default test loop.
+```
+
+ |
+
+
+
+In fact, in OpenMMLab 2.0, we introduced `Loop` to control the behaviors in training, validation and test. The functionalities of `Runner` are also changed. You can find more details of [runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md)
+in [MMEngine](https://github.com/open-mmlab/mmengine/).
+
+### Runtime settings
+
+Changes in **`checkpoint_config`** and **`log_config`**:
+
+The `checkpoint_config` are moved to `default_hooks.checkpoint` and the `log_config` are moved to `default_hooks.logger`.
+And we move many hooks settings from the script code to the `default_hooks` field in the runtime configuration.
+
+```python
+default_hooks = dict(
+ # record the time of every iterations.
+ timer=dict(type='IterTimerHook'),
+
+ # print log every 50 iterations.
+ logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
+
+ # enable the parameter scheduler.
+ param_scheduler=dict(type='ParamSchedulerHook'),
+
+ # save checkpoint every 2000 iterations.
+ checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
+
+ # set sampler seed in distributed environment.
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+
+ # validation results visualization.
+ visualization=dict(type='SegVisualizationHook'))
+```
+
+In addition, we split the original logger to logger and visualizer. The logger is used to record
+information and the visualizer is used to show the logger in different backends, like terminal and TensorBoard.
+
+
+
+Original |
+
+
+```python
+log_config = dict(
+ interval=100,
+ hooks=[
+ dict(type='TextLoggerHook'),
+ dict(type='TensorboardLoggerHook'),
+ ])
+```
+
+ |
+
+New |
+
+
+```python
+default_hooks = dict(
+ ...
+ logger=dict(type='LoggerHook', interval=100),
+)
+vis_backends = [dict(type='LocalVisBackend'),
+ dict(type='TensorboardVisBackend')]
+visualizer = dict(
+ type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+```
+
+ |
+
+
+
+Changes in **`load_from`** and **`resume_from`**:
+
+- The `resume_from` is removed. And we use `resume` and `load_from` to replace it.
+ - If `resume=True` and `load_from` is **not None**, resume training from the checkpoint in `load_from`.
+ - If `resume=True` and `load_from` is **None**, try to resume from the latest checkpoint in the work directory.
+ - If `resume=False` and `load_from` is **not None**, only load the checkpoint, not resume training.
+ - If `resume=False` and `load_from` is **None**, do not load nor resume.
+
+Changes in **`dist_params`**: The `dist_params` field is a sub field of `env_cfg` now. And there are some new
+configurations in the `env_cfg`.
+
+```python
+env_cfg = dict(
+ # whether to enable cudnn benchmark
+ cudnn_benchmark=False,
+
+ # set multi process parameters
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+
+ # set distributed parameters
+ dist_cfg=dict(backend='nccl'),
+)
+```
+
+Changes in **`workflow`**: `workflow` related functionalities are removed.
+
+New field **`visualizer`**: The visualizer is a new design in OpenMMLab 2.0 architecture. We use a
+visualizer instance in the runner to handle results & log visualization and save to different backends.
+See the [visualization tutorial](user_guides/visualization.md) for more details.
+
+New field **`default_scope`**: The start point to search module for all registries. The `default_scope` in MMSegmentation is `mmseg`. See [the registry tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/registry.md) for more details.
diff --git a/docs/en/migration/package.md b/docs/en/migration/package.md
new file mode 100644
index 000000000..ca24df588
--- /dev/null
+++ b/docs/en/migration/package.md
@@ -0,0 +1,114 @@
+# Package structures changes
+
+This section is included if you are curious about what has changed between MMSeg 0.x and 1.x.
+
+
+
+MMSegmentation 0.x |
+MMSegmentation 1.x |
+
+
+mmseg.api |
+mmseg.api |
+
+
+- mmseg.core |
++ mmseg.engine |
+
+
+mmseg.datasets |
+mmseg.datasets |
+
+
+mmseg.models |
+mmseg.models |
+
+
+- mmseg.ops |
++ mmseg.structure |
+
+
+mmseg.utils |
+mmseg.utils |
+
+
+ |
++ mmseg.evaluation |
+
+
+ |
++ mmseg.registry |
+
+
+
+## Removed packages
+
+### `mmseg.core`
+
+In OpenMMLab 2.0, `core` package has been removed. `hooks` and `optimizers` of `core` are moved in `mmseg.engine`, and `evaluation` in `core` is mmseg.evaluation currently.
+
+## `mmseg.ops`
+
+`ops` package included `encoding` and `wrappers`, which are moved in `mmseg.models.utils`.
+
+## Added packages
+
+### `mmseg.engine`
+
+OpenMMLab 2.0 adds a new foundational library for training deep learning, MMEngine. It servers as the training engine of all OpenMMLab codebases.
+`engine` package of mmseg is some customized modules for semantic segmentation task, like `SegVisualizationHook` which works for visualizing segmentation mask.
+
+### `mmseg.structure`
+
+In OpenMMLab 2.0, we designed data structure for computer vision task, and in mmseg, we implements `SegDataSample` in `structure` package.
+
+### `mmseg.evaluation`
+
+We move all evaluation metric in `mmseg.evaluation`.
+
+### `mmseg.registry`
+
+We moved registry implementations for all kinds of modules in MMSegmentation in `mmseg.registry`.
+
+## Modified packages
+
+### `mmseg.apis`
+
+OpenMMLab 2.0 tries to support unified interface for multitasking of Computer Vision,
+and releases much stronger [`Runner`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md),
+so MMSeg 1.x removed modules in `train.py` and `test.py` renamed `init_segmentor` to `init_model` and `inference_segmentor` to `inference_model`
+Here is the changes of `mmseg.apis`:
+
+| Function | Changes |
+| :-------------------: | :---------------------------------------------- |
+| `init_segmentor` | Renamed to `init_model` |
+| `inference_segmentor` | Rename to `inference_segmentor` |
+| `show_result_pyplot` | Implemented based on `SegLocalVisualizer` |
+| `train_model` | Removed, use `runner.train` to train. |
+| `multi_gpu_test` | Removed, use `runner.test` to test. |
+| `single_gpu_test` | Removed, use `runner.test` to test. |
+| `set_random_seed` | Removed, use `mmengine.runner.set_random_seed`. |
+| `init_random_seed` | Removed, use `mmengine.dist.sync_random_seed`. |
+
+### `mmseg.datasets`
+
+OpenMMLab 2.0 defines the `BaseDataset` to function and interface of dataset, and MMSegmentation 1.x also follow this protocol and defines the `BaseSegDataset` inherited from `BaseDataset`. MMCV 2.x collects general data transforms for multiple tasks e.g. classification, detection, segmentation, so MMSegmentation 1.x uses these data transforms and removes them from mmseg.datasets
+
+| Packages/Modules | Changes |
+| :-------------------: | :------------------------------------------------------------------------------------------ |
+| `mmseg.pipelines` | Renamed to `mmseg.transforms` |
+| `mmseg.sampler` | Move in `mmengine.dataset.sampler` |
+| `CustomDataset` | Renamed to `BaseDataset` and inherited from `BaseDataset` in MMEngine |
+| `DefaultFormatBundle` | Replaced with `PackSegInputs` |
+| `LoadImageFromFile` | Move in `mmcv.transforms.LoadImageFromFile` |
+| `LoadAnnotations` | Moved in `mmcv.transforms.LoadAnnotations` |
+| `Resize` | Moved in `mmcv.transforms` and split into `Resize`, `RandomResize` and `RandomChoiseResize` |
+| `RandomFlip` | Moved in `mmcv.transforms.RandomFlip` |
+| `Pad` | Moved in `mmcv.transforms.Pad` |
+| `Normalize` | Moved in `mmcv.transforms.Normalize` |
+| `Compose` | Moved in `mmcv.transforms.Compose` |
+| `ImageToTensor` | Moved in `mmcv.transforms.ImageToTensor` |
+
+### `mmseg.models`
+
+`models` has not changed a lot, just added the `encoding` and `wrappers` from previous `mmseg.ops`
diff --git a/docs/en/notes/changelog.md b/docs/en/notes/changelog.md
index cb9583035..9c468ab9e 100644
--- a/docs/en/notes/changelog.md
+++ b/docs/en/notes/changelog.md
@@ -1,5 +1,54 @@
# Changelog of v1.x
+## v1.0.0rc1 (2/11/2022)
+
+### Highlights
+
+- Support PoolFormer ([#2191](https://github.com/open-mmlab/mmsegmentation/pull/2191))
+- Add Decathlon dataset ([#2227](https://github.com/open-mmlab/mmsegmentation/pull/2227))
+
+### Features
+
+- Add BioMedical data loading ([#2176](https://github.com/open-mmlab/mmsegmentation/pull/2176))
+- Add LIP dataset ([#2251](https://github.com/open-mmlab/mmsegmentation/pull/2251))
+- Add `GenerateEdge` data transform ([#2210](https://github.com/open-mmlab/mmsegmentation/pull/2210))
+
+### Bug fix
+
+- Fix segmenter-vit-s_fcn config ([#2037](https://github.com/open-mmlab/mmsegmentation/pull/2037))
+- Fix binary segmentation ([#2101](https://github.com/open-mmlab/mmsegmentation/pull/2101))
+- Fix MMSegmentation colab demo ([#2089](https://github.com/open-mmlab/mmsegmentation/pull/2089))
+- Fix ResizeToMultiple transform ([#2185](https://github.com/open-mmlab/mmsegmentation/pull/2185))
+- Use SyncBN in mobilenet_v2 ([#2198](https://github.com/open-mmlab/mmsegmentation/pull/2198))
+- Fix typo in installation ([#2175](https://github.com/open-mmlab/mmsegmentation/pull/2175))
+- Fix typo in visualization.md ([#2116](https://github.com/open-mmlab/mmsegmentation/pull/2116))
+
+### Enhancement
+
+- Add mim extras_requires in setup.py ([#2012](https://github.com/open-mmlab/mmsegmentation/pull/2012))
+- Fix CI ([#2029](https://github.com/open-mmlab/mmsegmentation/pull/2029))
+- Remove ops module ([#2063](https://github.com/open-mmlab/mmsegmentation/pull/2063))
+- Add pyupgrade pre-commit hook ([#2078](https://github.com/open-mmlab/mmsegmentation/pull/2078))
+- Add `out_file` in `add_datasample` of `SegLocalVisualizer` to directly save image ([#2090](https://github.com/open-mmlab/mmsegmentation/pull/2090))
+- Upgrade pre commit hooks ([#2154](https://github.com/open-mmlab/mmsegmentation/pull/2154))
+- Ignore test timm in CI when torch\<1.7 ([#2158](https://github.com/open-mmlab/mmsegmentation/pull/2158))
+- Update requirements ([#2186](https://github.com/open-mmlab/mmsegmentation/pull/2186))
+- Fix Windows platform CI ([#2202](https://github.com/open-mmlab/mmsegmentation/pull/2202))
+
+### Documentation
+
+- Add `Overview` documentation ([#2042](https://github.com/open-mmlab/mmsegmentation/pull/2042))
+- Add `Evaluation` documentation ([#2077](https://github.com/open-mmlab/mmsegmentation/pull/2077))
+- Add `Migration` documentation ([#2066](https://github.com/open-mmlab/mmsegmentation/pull/2066))
+- Add `Structures` documentation ([#2070](https://github.com/open-mmlab/mmsegmentation/pull/2070))
+- Add `Structures` ZN documentation ([#2129](https://github.com/open-mmlab/mmsegmentation/pull/2129))
+- Add `Engine` ZN documentation ([#2157](https://github.com/open-mmlab/mmsegmentation/pull/2157))
+- Update `Prepare datasets` and `Visualization` doc ([#2054](https://github.com/open-mmlab/mmsegmentation/pull/2054))
+- Update `Models` documentation ([#2160](https://github.com/open-mmlab/mmsegmentation/pull/2160))
+- Update `Add New Modules` documentation ([#2067](https://github.com/open-mmlab/mmsegmentation/pull/2067))
+- Fix the installation commands in get_started.md ([#2174](https://github.com/open-mmlab/mmsegmentation/pull/2174))
+- Add MMYOLO to README.md ([#2220](https://github.com/open-mmlab/mmsegmentation/pull/2220))
+
## v1.0.0rc0 (31/8/2022)
We are excited to announce the release of MMSegmentation 1.0.0rc0.
diff --git a/docs/en/notes/changelog_v0.x.md b/docs/en/notes/changelog_v0.x.md
index dc94fbdf2..d347a444d 100644
--- a/docs/en/notes/changelog_v0.x.md
+++ b/docs/en/notes/changelog_v0.x.md
@@ -15,9 +15,9 @@
**New Features**
-- Support MAE: Masked Autoencoders Are Scalable Vision Learners ([1307](https://github.com/open-mmlab/mmsegmentation/pull/1307), [1523](https://github.com/open-mmlab/mmsegmentation/pull/1523))
-- Support Resnet strikes back ([1390](https://github.com/open-mmlab/mmsegmentation/pull/1390))
-- Support extra dataloader settings in configs ([1435](https://github.com/open-mmlab/mmsegmentation/pull/1435))
+- Support MAE: Masked Autoencoders Are Scalable Vision Learners ([#1307](https://github.com/open-mmlab/mmsegmentation/pull/1307), [#1523](https://github.com/open-mmlab/mmsegmentation/pull/1523))
+- Support Resnet strikes back ([#1390](https://github.com/open-mmlab/mmsegmentation/pull/1390))
+- Support extra dataloader settings in configs ([#1435](https://github.com/open-mmlab/mmsegmentation/pull/1435))
**Bug Fixes**
diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md
index 1c5f46cd7..1efb481d8 100644
--- a/docs/en/notes/faq.md
+++ b/docs/en/notes/faq.md
@@ -8,7 +8,8 @@ The compatible MMSegmentation and MMCV versions are as below. Please install the
| MMSegmentation version | MMCV version | MMClassification version |
| :--------------------: | :-------------------------: | :----------------------: |
-| 1.0.0rc0 | mmcv-full >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
+| 1.0.0rc1 | mmcv >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
+| 1.0.0rc0 | mmcv >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
| master | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
| 0.24.1 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
| 0.23.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
diff --git a/docs/en/overview.md b/docs/en/overview.md
index 07dd0c5c7..399f343fd 100644
--- a/docs/en/overview.md
+++ b/docs/en/overview.md
@@ -1 +1,85 @@
# Overview
+
+This chapter introduces you to the framework of MMSegmentation, and the basic conception of semantic segmentation. It also provides links to detailed tutorials about MMSegmentation.
+
+## What is semantic segmentation?
+
+Semantic segmentation is the task of clustering parts of an image together that belong to the same object class.
+It is a form of pixel-level prediction because each pixel in an image is classified according to a category.
+Some example benchmarks for this task are [Cityscapes](https://www.cityscapes-dataset.com/benchmarks/), [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) and [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/).
+Models are usually evaluated with the Mean Intersection-Over-Union (Mean IoU) and Pixel Accuracy metrics.
+
+## What is MMSegmentation?
+
+MMSegmentation is a toolbox that provides a framework for unified implementation and evaluation of semant
+ic segmentation methods,
+and contains high-quality implementations of popular semantic segmentation methods and datasets.
+
+MMSeg consists of 7 main parts including apis, structures, datasets, models, engine, evaluation and visualization.
+
+- **apis** provides high-level APIs for model inference.
+
+- **structures** provides segmentation data structure `SegDataSample`.
+
+- **datasets** supports various datasets for semantic segmentation.
+
+ - **transforms** contains a lot of useful data augmentation transforms.
+
+- **models** is the most vital part for segmentors and contains different components of a segmentor.
+
+ - **segmentors** defines all of the segmentation model classes.
+ - **data_preprocessors** works for preprocessing the input data of the model.
+ - **backbones** contains various backbone networks that transform an image to feature maps.
+ - **necks** contains various neck components that connect the backbone and heads.
+ - **decode_heads** contains various head components that take feature map as input and predict segmentation results.
+ - **losses** contains various loss functions.
+
+- **engine** is a part for runtime components that extends function of [MMEngine](https://github.com/open-mmlab/mmengine).
+
+ - **optimizers** provides optimizers and optimizer wrappers.
+ - **hooks** provides various hooks of the runner.
+
+- **evaluation** provides different metrics for evaluating model performance.
+
+- **visualization** is for visualizing segmentation results.
+
+## How to use this documentation
+
+Here is a detailed step-by-step guide to learn more about MMSegmentation:
+
+1. For installation instructions, please see [get_started](getting_started.md).
+
+2. For beginners, MMSegmentation is the best place to start the journey of semantic segmentation
+ as there are many SOTA and classic segmentation [models](model_zoo.md),
+ and it is easier to carry out a segmentation task by plugging together building blocks and convenient high-level apis.
+ Refer to the tutorials below for the basic usage of MMSegmentation:
+
+ - [Config](user_guides/1_config.md)
+ - [Dataset Preparation](user_guides/2_dataset_prepare.md)
+ - [Inference](user_guides/3_inference.md)
+ - [Train and Test](user_guides/4_train_test.md)
+
+3. If you would like to learn about the fundamental classes and features that make MMSegmentation work,
+ please refer to the tutorials below to dive deeper:
+
+ - [Data flow](advanced_guides/data_flow.md)
+ - [Structures](advanced_guides/structures.md)
+ - [Models](advanced_guides/models.md)
+ - [Datasets](advanced_guides/datasets.md)
+ - [Evaluation](advanced_guides/evaluation.md)
+
+4. MMSegmentation also provide tutorials for customization and advanced research,
+ please refer to the below guides to build your own segmentation project:
+
+ - [Add new models](advanced_guides/add_models.md)
+ - [Add new datasets](advanced_guides/add_dataset.md)
+ - [Add new transforms](advanced_guides/add_transform.md)
+ - [Customize runtime](advanced_guides/customize_runtime.md)
+
+5. If you are more familiar with MMSegmentation v0.x, there is documentation about migration from MMSegmentation v0.x to v1.x
+
+ - [migration](migration/index.rst)
+
+## References
+
+- https://paperswithcode.com/task/semantic-segmentation/codeless#task-home
diff --git a/docs/en/stat.py b/docs/en/stat.py
index 1398a706f..c458ee3c1 100755
--- a/docs/en/stat.py
+++ b/docs/en/stat.py
@@ -18,13 +18,15 @@ num_ckpts = 0
for f in files:
url = osp.dirname(f.replace('../../', url_prefix))
- with open(f, 'r') as content_file:
+ with open(f) as content_file:
content = content_file.read()
title = content.split('\n')[0].replace('#', '').strip()
- ckpts = set(x.lower().strip()
- for x in re.findall(r'https?://download.*\.pth', content)
- if 'mmsegmentation' in x)
+ ckpts = {
+ x.lower().strip()
+ for x in re.findall(r'https?://download.*\.pth', content)
+ if 'mmsegmentation' in x
+ }
if len(ckpts) == 0:
continue
@@ -34,7 +36,7 @@ for f in files:
assert len(_papertype) > 0
papertype = _papertype[0]
- paper = set([(papertype, title)])
+ paper = {(papertype, title)}
titles.append(title)
num_ckpts += len(ckpts)
diff --git a/docs/en/user_guides/1_config.md b/docs/en/user_guides/1_config.md
index 4beb276b1..aee41e51e 100644
--- a/docs/en/user_guides/1_config.md
+++ b/docs/en/user_guides/1_config.md
@@ -112,7 +112,7 @@ model = dict(
loss_weight=0.4)), # Loss weight of auxiliary_head.
# model training and testing settings
train_cfg=dict(), # train_cfg is just a place holder for now.
- test_cfg=dict(mode='whole')) # The test mode, options are 'whole' and 'sliding'. 'whole': whole image fully-convolutional test. 'sliding': sliding crop window on the image.
+ test_cfg=dict(mode='whole')) # The test mode, options are 'whole' and 'slide'. 'whole': whole image fully-convolutional test. 'slide': sliding crop window on the image.
```
`_base_/datasets/cityscapes.py` is the configuration file of the dataset
diff --git a/docs/en/user_guides/2_dataset_prepare.md b/docs/en/user_guides/2_dataset_prepare.md
index 07f814179..a795e3bfc 100644
--- a/docs/en/user_guides/2_dataset_prepare.md
+++ b/docs/en/user_guides/2_dataset_prepare.md
@@ -145,7 +145,7 @@ mmsegmentation
The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration.
By convention, `**labelTrainIds.png` are used for cityscapes training.
-We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
+We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/1.x/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
to generate `**labelTrainIds.png`.
```shell
@@ -351,7 +351,8 @@ The dataset is a Large-scale Dataset for Instance Segmentation (also have segman
You may need to follow the following structure for dataset preparation after downloading iSAID dataset.
-```
+```none
+├── data
│ ├── iSAID
│ │ ├── train
│ │ │ ├── images
@@ -376,3 +377,40 @@ python tools/dataset_converters/isaid.py /path/to/iSAID
```
In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation.
+
+## LIP(Look Into Person) dataset
+
+This dataset could be download from [this page](https://lip.sysuhcp.com/overview.php).
+
+Please run the following commands to unzip dataset.
+
+```shell
+unzip LIP.zip
+cd LIP
+unzip TrainVal_images.zip
+unzip TrainVal_parsing_annotations.zip
+cd TrainVal_parsing_annotations
+unzip TrainVal_parsing_annotations.zip
+mv train_segmentations ../
+mv val_segmentations ../
+cd ..
+```
+
+The contents of LIP datasets include:
+
+```none
+├── data
+│ ├── LIP
+│ │ ├── train_images
+│ │ │ ├── 1000_1234574.jpg
+│ │ │ ├── ...
+│ │ ├── train_segmentations
+│ │ │ ├── 1000_1234574.png
+│ │ │ ├── ...
+│ │ ├── val_images
+│ │ │ ├── 100034_483681.jpg
+│ │ │ ├── ...
+│ │ ├── val_segmentations
+│ │ │ ├── 100034_483681.png
+│ │ │ ├── ...
+```
diff --git a/docs/en/user_guides/visualization.md b/docs/en/user_guides/visualization.md
index 2780de136..e7c3359cc 100644
--- a/docs/en/user_guides/visualization.md
+++ b/docs/en/user_guides/visualization.md
@@ -38,7 +38,7 @@ Find the `vis_data` path of `work_dir` after starting training, for example, the
work_dirs/test_visual/20220810_115248/vis_data
```
-The scalar file in vis_data path includes learning rate, losses and data_time etc, also record metrics results and you can refer [logging tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/logging.html) in mmengine to log custom data. The tensorboard visualization results are executed with the following command:
+The scalar file in vis_data path includes learning rate, losses and data_time etc, also record metrics results and you can refer [logging tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/logging.html) in MMEngine to log custom data. The tensorboard visualization results are executed with the following command:
```shell
tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
@@ -46,9 +46,11 @@ tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
## Data and Results visualization
-MMSegmentation provides `SegVisualizationHook` that can render segmentation masks of ground truth and prediction. Users can modify `default_hooks` at each `schedule_x.py` config file.
+### Visualizer Data Samples during Model Testing or Validation
-For exsample, In `_base_/schedules/schedule_20k.py`, modify the `SegVisualizationHook` configuration, set `draw` to `True` to enable the storage of network inference results, `interval` indicates the sampling interval of the prediction results, and when set to 1, each inference result of the network will be saved. `interval` is set to 50 by default:
+MMSegmentation provides `SegVisualizationHook` which is a [hook](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md) working to visualize ground truth and prediction of segmentation during model testing and evaluation. Its configuration is in `default_hooks`, please see [Runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/runner.md) for more details.
+
+For example, In `_base_/schedules/schedule_20k.py`, modify the `SegVisualizationHook` configuration, set `draw` to `True` to enable the storage of network inference results, `interval` indicates the sampling interval of the prediction results, and when set to 1, each inference result of the network will be saved. `interval` is set to 50 by default:
```python
default_hooks = dict(
@@ -76,4 +78,97 @@ we can also run the following command to view them in TensorBoard:
tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
```
-If you would like to know more visualization usage, you can refer to [visualization tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/visualization.html) in mmengie.
+### Visualize a Single Data Sample
+
+If you want to visualize a single data sample, we suggest to use `SegLocalVisualizer`.
+
+`SegLocalVisualizer` is child class inherits from `Visualizer` in MMEngine and works for MMSegmentation visualization, for more details about `Visualizer` please refer to [visualization tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md) in MMEngine.
+
+Here is an example about `SegLocalVisualizer`, first you may download example data below by following commands:
+
+
+

+
+
+```shell
+wget https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png --output-document aachen_000000_000019_leftImg8bit.png
+wget https://user-images.githubusercontent.com/24582831/189833143-15f60f8a-4d1e-4cbb-a6e7-5e2233869fac.png --output-document aachen_000000_000019_gtFine_labelTrainIds.png
+```
+
+Then you can find their local path and use the scripts below to visualize:
+
+```python
+import mmcv
+import os.path as osp
+import torch
+# `PixelData` is data structure for pixel-level annotations or predictions defined in MMEngine.
+# Please refer to below tutorial file of data structures in MMEngine:
+# https://github.com/open-mmlab/mmengine/tree/main/docs/en/advanced_tutorials/data_element.md
+
+from mmengine.structures import PixelData
+
+# `SegDataSample` is data structure interface between different components
+# defined in MMSegmentation, it includes ground truth, prediction and
+# predicted logits of semantic segmentation.
+# Please refer to below tutorial file of `SegDataSample` for more details:
+# https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/advanced_guides/structures.md
+
+from mmseg.structures import SegDataSample
+from mmseg.visualization import SegLocalVisualizer
+
+out_file = 'out_file_cityscapes'
+save_dir = './work_dirs'
+
+image = mmcv.imread(
+ osp.join(
+ osp.dirname(__file__),
+ './aachen_000000_000019_leftImg8bit.png'
+ ),
+ 'color')
+sem_seg = mmcv.imread(
+ osp.join(
+ osp.dirname(__file__),
+ './aachen_000000_000019_gtFine_labelTrainIds.png' # noqa
+ ),
+ 'unchanged')
+sem_seg = torch.from_numpy(sem_seg)
+gt_sem_seg_data = dict(data=sem_seg)
+gt_sem_seg = PixelData(**gt_sem_seg_data)
+data_sample = SegDataSample()
+data_sample.gt_sem_seg = gt_sem_seg
+
+seg_local_visualizer = SegLocalVisualizer(
+ vis_backends=[dict(type='LocalVisBackend')],
+ save_dir=save_dir)
+
+# The meta information of dataset usually includes `classes` for class names and
+# `palette` for visualization color of each foreground.
+# All class names and palettes are defined in the file:
+# https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/utils/class_names.py
+
+seg_local_visualizer.dataset_meta = dict(
+ classes=('road', 'sidewalk', 'building', 'wall', 'fence',
+ 'pole', 'traffic light', 'traffic sign',
+ 'vegetation', 'terrain', 'sky', 'person', 'rider',
+ 'car', 'truck', 'bus', 'train', 'motorcycle',
+ 'bicycle'),
+ palette=[[128, 64, 128], [244, 35, 232], [70, 70, 70],
+ [102, 102, 156], [190, 153, 153], [153, 153, 153],
+ [250, 170, 30], [220, 220, 0], [107, 142, 35],
+ [152, 251, 152], [70, 130, 180], [220, 20, 60],
+ [255, 0, 0], [0, 0, 142], [0, 0, 70],
+ [0, 60, 100], [0, 80, 100], [0, 0, 230],
+ [119, 11, 32]])
+# When `show=True`, the results would be shown directly,
+# else if `show=False`, the results would be saved in local directory folder.
+seg_local_visualizer.add_datasample(out_file, image,
+ data_sample, show=False)
+```
+
+Then the visualization result of image with its corresponding ground truth could be found in `./work_dirs/vis_data/vis_image/` whose name is `out_file_cityscapes_0.png`:
+
+
+

+
+
+If you would like to know more visualization usage, you can refer to [visualization tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/visualization.html) in MMEngine.
diff --git a/docs/zh_cn/advanced_guides/engine.md b/docs/zh_cn/advanced_guides/engine.md
index de2390530..f67a78759 100644
--- a/docs/zh_cn/advanced_guides/engine.md
+++ b/docs/zh_cn/advanced_guides/engine.md
@@ -1 +1,163 @@
# 训练引擎
+
+## 钩子 (Hook)
+
+### 介绍
+
+OpenMMLab 将模型训练和测试过程抽象为 `Runner`, 插入钩子可以实现在 `Runner` 中不同的训练和测试节点 (例如 "每个训练 iter 前后", "每个验证 iter 前后" 等不同阶段) 所需要的相应功能. 更多钩子机制的介绍可以参考[这里](https://www.calltutors.com/blog/what-is-hook).
+
+`Runner` 中所使用的钩子分为两类:
+
+- 默认钩子 (default hooks)
+
+它们实现了训练时所必需的功能,在配置文件中用 `default_hooks` 定义传给 `Runner`, `Runner` 通过 [`register_default_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1780) 方法注册.
+钩子有对应的优先级, 优先级越高, 越早被执行器调用. 如果优先级一样, 被调用的顺序和钩子注册的顺序一致.
+不建议用户修改默认钩子的优先级,可以参考 [mmengine hooks 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/hook.md) 了解钩子优先级的定义.
+下面是 MMSegmentation 中所用到的默认钩子:
+
+| 钩子 | 用法 | 优先级 |
+| :-----------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------: | :---------------: |
+| [IterTimerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/iter_timer_hook.py) | 记录 iteration 花费的时间. | NORMAL (50) |
+| [LoggerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py) | 从 `Runner` 里不同的组件中收集日志记录,并将其输出到终端, JSON 文件,tensorboard,wandb 等下游. | BELOW_NORMAL (60) |
+| [ParamSchedulerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/param_scheduler_hook.py) | 更新优化器里面的一些超参数,例如学习率的动量. | LOW (70) |
+| [CheckpointHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) | 规律性地保存 checkpoint 文件. | VERY_LOW (90) |
+| [DistSamplerSeedHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sampler_seed_hook.py) | 确保分布式采样器 shuffle 是打开的. | NORMAL (50) |
+| [SegVisualizationHook](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/visualization/local_visualizer.py) | 可视化验证和测试过程里的预测结果. | NORMAL (50) |
+
+它们在配置文件中的配置为:
+
+```python
+default_hooks = dict(
+ timer=dict(type='IterTimerHook'),
+ logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ visualization=dict(type='SegVisualizationHook'))
+```
+
+以上默认钩子除 `SegVisualizationHook` 外都是在 MMEngine 中所实现, `SegVisualizationHook` 是在 MMSegmentation 里被实现的钩子, 之后会专门介绍.
+
+- 自定义钩子 (custom hooks)
+
+自定义钩子在配置通过 `custom_hooks` 定义, `Runner` 通过 [`register_custom_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1852) 方法注册.
+自定义钩子优先级需要在配置文件里设置, 如果没有设置, 则会被默认设置为 `NORMAL`. 下面是部分 MMEngine 中实现的自定义钩子:
+
+| 钩子 | 用法 |
+| :----------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------: |
+| [EMAHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/ema_hook.py) | 在模型训练时使用指数滑动平均 (Exponential Moving Average, EMA). |
+| [EmptyCacheHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/empty_cache_hook.py) | 在训练时释放所有没有被缓存占用的 GPU 显存. |
+| [SyncBuffersHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sync_buffer_hook.py) | 在每个训练 Epoch 结束时同步模型 buffer 里的参数例如 BN 里的 `running_mean` 和 `running_var`. |
+
+以下是 `EMAHook` 的用例, 配置文件中, 将已经实现的自定义钩子的配置作为 `custom_hooks` 列表中的成员.
+
+```python
+custom_hooks = [
+ dict(type='EMAHook', start_iters=500, priority='NORMAL')
+]
+```
+
+### SegVisualizationHook
+
+MMSegmentation 实现了 [`SegVisualizationHook`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/engine/hooks/visualization_hook.py#L17), 用来在验证和测试时可视化预测结果.
+`SegVisualizationHook` 重写了基类 `Hook` 中的 `_after_iter` 方法, 在验证或测试时, 根据指定的迭代次数间隔调用 `visualizer` 的 `add_datasample` 方法绘制语义分割结果,具体实现如下:
+
+```python
+...
+@HOOKS.register_module()
+class SegVisualizationHook(Hook):
+...
+ def _after_iter(self,
+ runner: Runner,
+ batch_idx: int,
+ data_batch: dict,
+ outputs: Sequence[SegDataSample],
+ mode: str = 'val') -> None:
+...
+ # 如果是训练阶段或者 self.draw 为 False 则直接跳出
+ if self.draw is False or mode == 'train':
+ return
+...
+ if self.every_n_inner_iters(batch_idx, self.interval):
+ for output in outputs:
+ img_path = output.img_path
+ img_bytes = self.file_client.get(img_path)
+ img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
+ window_name = f'{mode}_{osp.basename(img_path)}'
+
+ self._visualizer.add_datasample(
+ window_name,
+ img,
+ data_sample=output,
+ show=self.show,
+ wait_time=self.wait_time,
+ step=runner.iter)
+
+```
+
+关于可视化更多的细节可以查看[这里](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/visualization.md).
+
+## 优化器
+
+### 优化器封装
+
+OpenMMLab 2.0 设计了优化器封装, 它支持不同的训练策略, 包括混合精度训练、梯度累加和梯度截断等, 用户可以根据需求选择合适的训练策略.
+优化器封装还定义了一套标准的参数更新流程, 用户可以基于这一套流程, 在同一套代码里, 实现不同训练策略的切换. 如果想了解更多, 可以参考 [MMEngine 优化器封装文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md).
+
+MMSegmenetation 训练模型也是使用优化器封装来优化参数, 以下是 MMSegmentation 中常用的使用方法:
+
+#### 配置 PyTorch 支持的优化器
+
+OpenMMLab 2.0 支持 PyTorch 原生所有优化器, 参考[这里](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md#%E7%AE%80%E5%8D%95%E9%85%8D%E7%BD%AE).
+在配置文件中设置训练时 `Runner` 所使用的优化器, 需要定义 `optim_wrapper`, 例如配置使用 SGD 优化器:
+
+```python
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005),
+ clip_grad=None)
+```
+
+#### `paramwise_cfg` 参数
+
+在模型训练中, 如果想在优化器里为不同参数设置优化策略, 例如设置不同的学习率、权重衰减, 可以通过设置 `paramwise_cfg` 来实现.
+
+例如, 在使用 ViT 作为模型骨干网络进行训练时, 优化器中设置了权重衰减 (weight decay), 但对 position embedding, layer normalization 和 class token 参数需要关掉 weight decay, `optim_wrapper` 的配置[如下](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py#L15-L27):
+
+```python
+optimizer = dict(
+ type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
+optim_wrapper = dict(
+ _delete_=True,
+ type='OptimWrapper',
+ optimizer=optimizer,
+ paramwise_cfg=dict(
+ custom_keys={
+ 'pos_embed': dict(decay_mult=0.),
+ 'cls_token': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }))
+```
+
+其中 `decay_mult` 指的是对应参数的权重衰减的系数. 关于更多 `paramwise_cfg` 的使用可以参考 [MMEngine 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md).
+
+### 优化器封装构造器
+
+默认的优化器封装构造器 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 根据输入的 `optim_wrapper` 和 `optim_wrapper` 中定义的 `paramwise_cfg` 来构建训练中使用的优化器. 当 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 功能不能满足需求时, 可以自定义优化器封装构造器来实现超参数的配置.
+
+MMSegmentation 中的实现了 [`LearningRateDecayOptimizerConstructor`](https://github.com/open-mmlab/mmsegmentation/blob/b21df463d47447f33c28d9a4f46136ad64d34a40/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py#L104), 可以对以 ConvNeXt, BEiT 和 MAE 为骨干网络的模型训练时, 骨干网络的模型参数的学习率按照定义的衰减比例(`decay_rate`)逐层递减, 在配置文件中的配置如下:
+
+```python
+optim_wrapper = dict(
+ _delete_=True,
+ type='AmpOptimWrapper',
+ optimizer=dict(
+ type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
+ paramwise_cfg={
+ 'decay_rate': 0.9,
+ 'decay_type': 'stage_wise',
+ 'num_layers': 12
+ },
+ constructor='LearningRateDecayOptimizerConstructor',
+ loss_scale='dynamic')
+```
diff --git a/docs/zh_cn/advanced_guides/structures.md b/docs/zh_cn/advanced_guides/structures.md
index d045dd413..958e011a7 100644
--- a/docs/zh_cn/advanced_guides/structures.md
+++ b/docs/zh_cn/advanced_guides/structures.md
@@ -1 +1,102 @@
# 数据结构
+
+为了统一模型和各功能模块之间的输入和输出的接口, 在 OpenMMLab 2.0 MMEngine 中定义了一套抽象数据结构, 实现了基础的增/删/查/改功能, 支持不同设备间的数据迁移, 也支持了如
+`.cpu()`, `.cuda()`, `.get()` 和 `.detach()` 的类字典和张量的操作。具体可以参考 [MMEngine 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/data_element.md)。
+
+同样的, MMSegmentation 亦遵循了 OpenMMLab 2.0 各模块间的接口协议, 定义了 `SegDataSample` 用来封装语义分割任务所需要的数据。
+
+## 语义分割数据 SegDataSample
+
+[SegDataSample](mmseg.structures.SegDataSample) 包括了三个主要数据字段 `gt_sem_seg`, `pred_sem_seg` 和 `seg_logits`, 分别用来存放标注信息, 预测结果和预测的未归一化前的 logits 值。
+
+| 字段 | 类型 | 描述 |
+| -------------- | ------------------------- | ------------------------------- |
+| gt_sem_seg | [`PixelData`](#pixeldata) | 图像标注信息. |
+| pred_instances | [`PixelData`](#pixeldata) | 图像预测结果. |
+| seg_logits | [`PixelData`](#pixeldata) | 模型预测未归一化前的 logits 值. |
+
+以下示例代码展示了 `SegDataSample` 的使用方法:
+
+```python
+import torch
+from mmengine.structures import PixelData
+from mmseg.structures import SegDataSample
+
+img_meta = dict(img_shape=(4, 4, 3),
+ pad_shape=(4, 4, 3))
+data_sample = SegDataSample()
+# 定义 gt_segmentations 用于封装模型的输出信息
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+
+# 增加和处理 SegDataSample 中的属性
+data_sample.gt_sem_seg = gt_segmentations
+assert 'gt_sem_seg' in data_sample
+assert 'data' in data_sample.gt_sem_seg
+assert 'img_shape' in data_sample.gt_sem_seg.metainfo_keys()
+print(data_sample.gt_sem_seg.shape)
+'''
+(4, 4)
+'''
+print(data_sample)
+'''
+
+) at 0x1c2aae44d60>
+'''
+
+# 删除和修改 SegDataSample 中的属性
+data_sample = SegDataSample()
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+data_sample.gt_sem_seg = gt_segmentations
+data_sample.gt_sem_seg.set_metainfo(dict(img_shape=(4,4,9), pad_shape=(4,4,9)))
+del data_sample.gt_sem_seg.img_shape
+
+# 类张量的操作
+data_sample = SegDataSample()
+gt_segmentations = PixelData(metainfo=img_meta)
+gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
+cuda_gt_segmentations = gt_segmentations.cuda()
+cuda_gt_segmentations = gt_segmentations.to('cuda:0')
+cpu_gt_segmentations = cuda_gt_segmentations.cpu()
+cpu_gt_segmentations = cuda_gt_segmentations.to('cpu')
+```
+
+## 在 SegDataSample 中自定义新的属性
+
+如果你想在 `SegDataSample` 中自定义新的属性,你可以参考下面的 [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 示例:
+
+```python
+class SegDataSample(BaseDataElement):
+ ...
+
+ @property
+ def xxx_property(self) -> xxxData:
+ return self._xxx_property
+
+ @xxx_property.setter
+ def xxx_property(self, value: xxxData) -> None:
+ self.set_field(value, '_xxx_property', dtype=xxxData)
+
+ @xxx_property.deleter
+ def xxx_property(self) -> None:
+ del self._xxx_property
+```
+
+这样一个新的属性 `xxx_property` 就将被增加到 `SegDataSample` 里面了。
diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst
index 3da37ce6f..be68c7579 100644
--- a/docs/zh_cn/api.rst
+++ b/docs/zh_cn/api.rst
@@ -75,11 +75,6 @@ necks
.. automodule:: mmseg.models.necks
:members:
-mmseg.ops
---------------
-.. automodule:: mmseg.ops
- :members:
-
mmseg.registry
--------------
.. automodule:: mmseg.registry
diff --git a/docs/zh_cn/conf.py b/docs/zh_cn/conf.py
index 353b0bc72..18420558d 100644
--- a/docs/zh_cn/conf.py
+++ b/docs/zh_cn/conf.py
@@ -28,7 +28,7 @@ version_file = '../../mmseg/version.py'
def get_version():
- with open(version_file, 'r') as f:
+ with open(version_file) as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
diff --git a/docs/zh_cn/stat.py b/docs/zh_cn/stat.py
index b3a1d7306..7a86302e3 100755
--- a/docs/zh_cn/stat.py
+++ b/docs/zh_cn/stat.py
@@ -18,13 +18,15 @@ num_ckpts = 0
for f in files:
url = osp.dirname(f.replace('../../', url_prefix))
- with open(f, 'r') as content_file:
+ with open(f) as content_file:
content = content_file.read()
title = content.split('\n')[0].replace('#', '').strip()
- ckpts = set(x.lower().strip()
- for x in re.findall(r'https?://download.*\.pth', content)
- if 'mmsegmentation' in x)
+ ckpts = {
+ x.lower().strip()
+ for x in re.findall(r'https?://download.*\.pth', content)
+ if 'mmsegmentation' in x
+ }
if len(ckpts) == 0:
continue
@@ -34,7 +36,7 @@ for f in files:
assert len(_papertype) > 0
papertype = _papertype[0]
- paper = set([(papertype, title)])
+ paper = {(papertype, title)}
titles.append(title)
num_ckpts += len(ckpts)
diff --git a/mmseg/__init__.py b/mmseg/__init__.py
index 7c4aadc59..8a8593bc9 100644
--- a/mmseg/__init__.py
+++ b/mmseg/__init__.py
@@ -10,6 +10,7 @@ from .version import __version__, version_info
MMCV_MIN = '2.0.0rc1'
MMCV_MAX = '2.1.0'
MMENGINE_MIN = '0.1.0'
+MMENGINE_MAX = '1.0.0'
def digit_version(version_str: str, length: int = 4):
@@ -62,10 +63,12 @@ assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \
f'Please install mmcv>={mmcv_min_version}, <{mmcv_max_version}.'
mmengine_min_version = digit_version(MMENGINE_MIN)
+mmengine_max_version = digit_version(MMENGINE_MAX)
mmengine_version = digit_version(mmengine.__version__)
-assert (mmengine_min_version <= mmengine_version), \
+assert (mmengine_min_version <= mmengine_version < mmengine_max_version), \
f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
- f'Please install mmengine>={mmengine_min_version}.'
+ f'Please install mmengine>={mmengine_min_version}, '\
+ f'<{mmengine_max_version}.'
__all__ = ['__version__', 'version_info', 'digit_version']
diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py
index 679d40738..9abc85d62 100644
--- a/mmseg/apis/inference.py
+++ b/mmseg/apis/inference.py
@@ -102,7 +102,7 @@ def _preprare_data(imgs: ImageType, model: BaseSegmentor):
is_batch = False
if isinstance(imgs[0], np.ndarray):
- cfg.test_pipeline[0].type = 'LoadImageFromNDArray'
+ cfg.test_pipeline[0]['type'] = 'LoadImageFromNDArray'
# TODO: Consider using the singleton pattern to avoid building
# a pipeline for each inference
@@ -203,9 +203,8 @@ def show_result_pyplot(model: BaseSegmentor,
draw_gt=draw_gt,
draw_pred=draw_pred,
wait_time=wait_time,
+ out_file=out_file,
show=show)
vis_img = visualizer.get_image()
- if out_file is not None:
- mmcv.imwrite(vis_img, out_file)
return vis_img
diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py
index 5cd5a772f..bf506eafa 100644
--- a/mmseg/datasets/__init__.py
+++ b/mmseg/datasets/__init__.py
@@ -6,20 +6,23 @@ from .cityscapes import CityscapesDataset
from .coco_stuff import COCOStuffDataset
from .dark_zurich import DarkZurichDataset
from .dataset_wrappers import MultiImageMixDataset
+from .decathlon import DecathlonDataset
from .drive import DRIVEDataset
from .hrf import HRFDataset
from .isaid import iSAIDDataset
from .isprs import ISPRSDataset
+from .lip import LIPDataset
from .loveda import LoveDADataset
from .night_driving import NightDrivingDataset
from .pascal_context import PascalContextDataset, PascalContextDataset59
from .potsdam import PotsdamDataset
from .stare import STAREDataset
-from .transforms import (CLAHE, AdjustGamma, LoadAnnotations,
- LoadImageFromNDArray, PackSegInputs,
- PhotoMetricDistortion, RandomCrop, RandomCutOut,
- RandomMosaic, RandomRotate, Rerange, ResizeToMultiple,
- RGB2Gray, SegRescale)
+from .transforms import (CLAHE, AdjustGamma, GenerateEdge, LoadAnnotations,
+ LoadBiomedicalAnnotation, LoadBiomedicalData,
+ LoadBiomedicalImageFromFile, LoadImageFromNDArray,
+ PackSegInputs, PhotoMetricDistortion, RandomCrop,
+ RandomCutOut, RandomMosaic, RandomRotate, Rerange,
+ ResizeToMultiple, RGB2Gray, SegRescale)
from .voc import PascalVOCDataset
__all__ = [
@@ -31,5 +34,7 @@ __all__ = [
'LoadAnnotations', 'RandomCrop', 'SegRescale', 'PhotoMetricDistortion',
'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray',
'RandomCutOut', 'RandomMosaic', 'PackSegInputs', 'ResizeToMultiple',
- 'LoadImageFromNDArray'
+ 'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile',
+ 'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge',
+ 'DecathlonDataset', 'LIPDataset'
]
diff --git a/mmseg/datasets/basesegdataset.py b/mmseg/datasets/basesegdataset.py
index 1d42c130f..4476a1eda 100644
--- a/mmseg/datasets/basesegdataset.py
+++ b/mmseg/datasets/basesegdataset.py
@@ -85,7 +85,7 @@ class BaseSegDataset(BaseDataset):
seg_map_suffix='.png',
metainfo: Optional[dict] = None,
data_root: Optional[str] = None,
- data_prefix: dict = dict(img_path=None, seg_map_path=None),
+ data_prefix: dict = dict(img_path='', seg_map_path=''),
filter_cfg: Optional[dict] = None,
indices: Optional[Union[int, Sequence[int]]] = None,
serialize_data: bool = True,
@@ -132,9 +132,6 @@ class BaseSegDataset(BaseDataset):
# if it is not defined
updated_palette = self._update_palette()
self._metainfo.update(dict(palette=updated_palette))
- if test_mode:
- assert self._metainfo.get('classes') is not None, \
- 'dataset metainfo `classes` should be specified when testing'
# Join paths.
if self.data_root is not None:
@@ -146,6 +143,10 @@ class BaseSegDataset(BaseDataset):
if not lazy_init:
self.full_init()
+ if test_mode:
+ assert self._metainfo.get('classes') is not None, \
+ 'dataset metainfo `classes` should be specified when testing'
+
@classmethod
def get_label_map(cls,
new_classes: Optional[Sequence] = None
diff --git a/mmseg/datasets/decathlon.py b/mmseg/datasets/decathlon.py
new file mode 100644
index 000000000..26aa4ef0d
--- /dev/null
+++ b/mmseg/datasets/decathlon.py
@@ -0,0 +1,96 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+from typing import List
+
+from mmengine.fileio import load
+
+from mmseg.registry import DATASETS
+from .basesegdataset import BaseSegDataset
+
+
+@DATASETS.register_module()
+class DecathlonDataset(BaseSegDataset):
+ """Dataset for Dacathlon dataset.
+
+ The dataset.json format is shown as follows
+
+ .. code-block:: none
+
+ {
+ "name": "BRATS",
+ "tensorImageSize": "4D",
+ "modality":
+ {
+ "0": "FLAIR",
+ "1": "T1w",
+ "2": "t1gd",
+ "3": "T2w"
+ },
+ "labels": {
+ "0": "background",
+ "1": "edema",
+ "2": "non-enhancing tumor",
+ "3": "enhancing tumour"
+ },
+ "numTraining": 484,
+ "numTest": 266,
+ "training":
+ [
+ {
+ "image": "./imagesTr/BRATS_306.nii.gz"
+ "label": "./labelsTr/BRATS_306.nii.gz"
+ ...
+ }
+ ]
+ "test":
+ [
+ "./imagesTs/BRATS_557.nii.gz"
+ ...
+ ]
+ }
+ """
+
+ def load_data_list(self) -> List[dict]:
+ """Load annotation from directory or annotation file.
+
+ Returns:
+ list[dict]: All data info of dataset.
+ """
+ # `self.ann_file` denotes the absolute annotation file path if
+ # `self.root=None` or relative path if `self.root=/path/to/data/`.
+ annotations = load(self.ann_file)
+ if not isinstance(annotations, dict):
+ raise TypeError(f'The annotations loaded from annotation file '
+ f'should be a dict, but got {type(annotations)}!')
+ raw_data_list = annotations[
+ 'training'] if not self.test_mode else annotations['test']
+ data_list = []
+ for raw_data_info in raw_data_list:
+ # `2:` works for removing './' in file path, which will break
+ # loading from cloud storage.
+ if isinstance(raw_data_info, dict):
+ data_info = dict(
+ img_path=osp.join(self.data_root, raw_data_info['image']
+ [2:]))
+ data_info['seg_map_path'] = osp.join(
+ self.data_root, raw_data_info['label'][2:])
+ else:
+ data_info = dict(
+ img_path=osp.join(self.data_root, raw_data_info)[2:])
+ data_info['label_map'] = self.label_map
+ data_info['reduce_zero_label'] = self.reduce_zero_label
+ data_info['seg_fields'] = []
+ data_list.append(data_info)
+ annotations.pop('training')
+ annotations.pop('test')
+
+ metainfo = copy.deepcopy(annotations)
+ metainfo['classes'] = [*metainfo['labels'].values()]
+ # Meta information load from annotation file will not influence the
+ # existed meta information load from `BaseDataset.METAINFO` and
+ # `metainfo` arguments defined in constructor.
+ for k, v in metainfo.items():
+ self._metainfo.setdefault(k, v)
+
+ return data_list
diff --git a/mmseg/datasets/lip.py b/mmseg/datasets/lip.py
new file mode 100644
index 000000000..40a703ffd
--- /dev/null
+++ b/mmseg/datasets/lip.py
@@ -0,0 +1,43 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmseg.registry import DATASETS
+from .basesegdataset import BaseSegDataset
+
+
+@DATASETS.register_module()
+class LIPDataset(BaseSegDataset):
+ """LIP dataset.
+
+ The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
+ '.png'.
+ """
+ METAINFO = dict(
+ classes=('Background', 'Hat', 'Hair', 'Glove', 'Sunglasses',
+ 'UpperClothes', 'Dress', 'Coat', 'Socks', 'Pants',
+ 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm',
+ 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe',
+ 'Right-shoe'),
+ palette=(
+ [0, 0, 0],
+ [128, 0, 0],
+ [255, 0, 0],
+ [0, 85, 0],
+ [170, 0, 51],
+ [255, 85, 0],
+ [0, 0, 85],
+ [0, 119, 221],
+ [85, 85, 0],
+ [0, 85, 85],
+ [85, 51, 0],
+ [52, 86, 128],
+ [0, 128, 0],
+ [0, 0, 255],
+ [51, 170, 221],
+ [0, 255, 255],
+ [85, 255, 170],
+ [170, 255, 85],
+ [255, 255, 0],
+ [255, 170, 0],
+ ))
+
+ def __init__(self, **kwargs) -> None:
+ super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)
diff --git a/mmseg/datasets/transforms/__init__.py b/mmseg/datasets/transforms/__init__.py
index 56fe94142..09f6c655a 100644
--- a/mmseg/datasets/transforms/__init__.py
+++ b/mmseg/datasets/transforms/__init__.py
@@ -1,13 +1,17 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .formatting import PackSegInputs
-from .loading import LoadAnnotations, LoadImageFromNDArray
-from .transforms import (CLAHE, AdjustGamma, PhotoMetricDistortion, RandomCrop,
- RandomCutOut, RandomMosaic, RandomRotate, Rerange,
- ResizeToMultiple, RGB2Gray, SegRescale)
+from .loading import (LoadAnnotations, LoadBiomedicalAnnotation,
+ LoadBiomedicalData, LoadBiomedicalImageFromFile,
+ LoadImageFromNDArray)
+from .transforms import (CLAHE, AdjustGamma, GenerateEdge,
+ PhotoMetricDistortion, RandomCrop, RandomCutOut,
+ RandomMosaic, RandomRotate, Rerange, ResizeToMultiple,
+ RGB2Gray, SegRescale)
__all__ = [
'LoadAnnotations', 'RandomCrop', 'SegRescale', 'PhotoMetricDistortion',
'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray',
'RandomCutOut', 'RandomMosaic', 'PackSegInputs', 'ResizeToMultiple',
- 'LoadImageFromNDArray'
+ 'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile',
+ 'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge'
]
diff --git a/mmseg/datasets/transforms/loading.py b/mmseg/datasets/transforms/loading.py
index d0589b8b0..ea51e0df5 100644
--- a/mmseg/datasets/transforms/loading.py
+++ b/mmseg/datasets/transforms/loading.py
@@ -1,12 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
+from typing import Dict
import mmcv
+import mmengine
import numpy as np
+from mmcv.transforms import BaseTransform
from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations
from mmcv.transforms import LoadImageFromFile
from mmseg.registry import TRANSFORMS
+from mmseg.utils import datafrombytes
@TRANSFORMS.register_module()
@@ -168,3 +172,273 @@ class LoadImageFromNDArray(LoadImageFromFile):
results['img_shape'] = img.shape[:2]
results['ori_shape'] = img.shape[:2]
return results
+
+
+@TRANSFORMS.register_module()
+class LoadBiomedicalImageFromFile(BaseTransform):
+ """Load an biomedical mage from file.
+
+ Required Keys:
+
+ - img_path
+
+ Added Keys:
+
+ - img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default,
+ N is the number of modalities, and data type is float32
+ if set to_float32 = True, or float64 if decode_backend is 'nifti' and
+ to_float32 is False.
+ - img_shape
+ - ori_shape
+
+ Args:
+ decode_backend (str): The data decoding backend type. Options are
+ 'numpy'and 'nifti', and there is a convention that when backend is
+ 'nifti' the axis of data loaded is XYZ, and when backend is
+ 'numpy', the the axis is ZYX. The data will be transposed if the
+ backend is 'nifti'. Defaults to 'nifti'.
+ to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
+ Defaults to False.
+ to_float32 (bool): Whether to convert the loaded image to a float32
+ numpy array. If set to False, the loaded image is an float64 array.
+ Defaults to True.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmengine.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ """
+
+ def __init__(
+ self,
+ decode_backend: str = 'nifti',
+ to_xyz: bool = False,
+ to_float32: bool = True,
+ file_client_args: dict = dict(backend='disk')
+ ) -> None:
+ self.decode_backend = decode_backend
+ self.to_xyz = to_xyz
+ self.to_float32 = to_float32
+ self.file_client_args = file_client_args.copy()
+ self.file_client = mmengine.FileClient(**self.file_client_args)
+
+ def transform(self, results: Dict) -> Dict:
+ """Functions to load image.
+
+ Args:
+ results (dict): Result dict from :obj:``mmcv.BaseDataset``.
+
+ Returns:
+ dict: The dict contains loaded image and meta information.
+ """
+
+ filename = results['img_path']
+
+ data_bytes = self.file_client.get(filename)
+ img = datafrombytes(data_bytes, backend=self.decode_backend)
+
+ if self.to_float32:
+ img = img.astype(np.float32)
+
+ if len(img.shape) == 3:
+ img = img[None, ...]
+
+ if self.decode_backend == 'nifti':
+ img = img.transpose(0, 3, 2, 1)
+
+ if self.to_xyz:
+ img = img.transpose(0, 3, 2, 1)
+
+ results['img'] = img
+ results['img_shape'] = img.shape[1:]
+ results['ori_shape'] = img.shape[1:]
+ return results
+
+ def __repr__(self):
+ repr_str = (f'{self.__class__.__name__}('
+ f"decode_backend='{self.decode_backend}', "
+ f'to_xyz={self.to_xyz}, '
+ f'to_float32={self.to_float32}, '
+ f'file_client_args={self.file_client_args})')
+ return repr_str
+
+
+@TRANSFORMS.register_module()
+class LoadBiomedicalAnnotation(BaseTransform):
+ """Load ``seg_map`` annotation provided by biomedical dataset.
+
+ The annotation format is as the following:
+
+ .. code-block:: python
+
+ {
+ 'gt_seg_map': np.ndarray (X, Y, Z) or (Z, Y, X)
+ }
+
+ Required Keys:
+
+ - seg_map_path
+
+ Added Keys:
+
+ - gt_seg_map (np.ndarray): Biomedical seg map with shape (Z, Y, X) by
+ default, and data type is float32 if set to_float32 = True, or
+ float64 if decode_backend is 'nifti' and to_float32 is False.
+
+ Args:
+ decode_backend (str): The data decoding backend type. Options are
+ 'numpy'and 'nifti', and there is a convention that when backend is
+ 'nifti' the axis of data loaded is XYZ, and when backend is
+ 'numpy', the the axis is ZYX. The data will be transposed if the
+ backend is 'nifti'. Defaults to 'nifti'.
+ to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
+ Defaults to False.
+ to_float32 (bool): Whether to convert the loaded seg map to a float32
+ numpy array. If set to False, the loaded image is an float64 array.
+ Defaults to True.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmengine.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ """
+
+ def __init__(
+ self,
+ decode_backend: str = 'nifti',
+ to_xyz: bool = False,
+ to_float32: bool = True,
+ file_client_args: dict = dict(backend='disk')
+ ) -> None:
+ super().__init__()
+ self.decode_backend = decode_backend
+ self.to_xyz = to_xyz
+ self.to_float32 = to_float32
+ self.file_client_args = file_client_args.copy()
+ self.file_client = mmengine.FileClient(**self.file_client_args)
+
+ def transform(self, results: Dict) -> Dict:
+ """Functions to load image.
+
+ Args:
+ results (dict): Result dict from :obj:``mmcv.BaseDataset``.
+
+ Returns:
+ dict: The dict contains loaded image and meta information.
+ """
+ data_bytes = self.file_client.get(results['seg_map_path'])
+ gt_seg_map = datafrombytes(data_bytes, backend=self.decode_backend)
+
+ if self.to_float32:
+ gt_seg_map = gt_seg_map.astype(np.float32)
+
+ if self.decode_backend == 'nifti':
+ gt_seg_map = gt_seg_map.transpose(2, 1, 0)
+
+ if self.to_xyz:
+ gt_seg_map = gt_seg_map.transpose(2, 1, 0)
+
+ results['gt_seg_map'] = gt_seg_map
+ return results
+
+ def __repr__(self):
+ repr_str = (f'{self.__class__.__name__}('
+ f"decode_backend='{self.decode_backend}', "
+ f'to_xyz={self.to_xyz}, '
+ f'to_float32={self.to_float32}, '
+ f'file_client_args={self.file_client_args})')
+ return repr_str
+
+
+@TRANSFORMS.register_module()
+class LoadBiomedicalData(BaseTransform):
+ """Load an biomedical image and annotation from file.
+
+ The loading data format is as the following:
+
+ .. code-block:: python
+
+ {
+ 'img': np.ndarray data[:-1, X, Y, Z]
+ 'seg_map': np.ndarray data[-1, X, Y, Z]
+ }
+
+
+ Required Keys:
+
+ - img_path
+
+ Added Keys:
+
+ - img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default,
+ N is the number of modalities.
+ - gt_seg_map (np.ndarray, optional): Biomedical seg map with shape
+ (Z, Y, X) by default.
+ - img_shape
+ - ori_shape
+
+ Args:
+ with_seg (bool): Whether to parse and load the semantic segmentation
+ annotation. Defaults to False.
+ decode_backend (str): The data decoding backend type. Options are
+ 'numpy'and 'nifti', and there is a convention that when backend is
+ 'nifti' the axis of data loaded is XYZ, and when backend is
+ 'numpy', the the axis is ZYX. The data will be transposed if the
+ backend is 'nifti'. Defaults to 'nifti'.
+ to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
+ Defaults to False.
+ file_client_args (dict): Arguments to instantiate a FileClient.
+ See :class:`mmengine.fileio.FileClient` for details.
+ Defaults to ``dict(backend='disk')``.
+ """
+
+ def __init__(
+ self,
+ with_seg=False,
+ decode_backend: str = 'numpy',
+ to_xyz: bool = False,
+ file_client_args: dict = dict(backend='disk')
+ ) -> None:
+ self.with_seg = with_seg
+ self.decode_backend = decode_backend
+ self.to_xyz = to_xyz
+ self.file_client_args = file_client_args.copy()
+ self.file_client = mmengine.FileClient(**self.file_client_args)
+
+ def transform(self, results: Dict) -> Dict:
+ """Functions to load image.
+
+ Args:
+ results (dict): Result dict from :obj:``mmcv.BaseDataset``.
+
+ Returns:
+ dict: The dict contains loaded image and meta information.
+ """
+ data_bytes = self.file_client.get(results['img_path'])
+ data = datafrombytes(data_bytes, backend=self.decode_backend)
+ # img is 4D data (N, X, Y, Z), N is the number of protocol
+ img = data[:-1, :]
+
+ if self.decode_backend == 'nifti':
+ img = img.transpose(0, 3, 2, 1)
+
+ if self.to_xyz:
+ img = img.transpose(0, 3, 2, 1)
+
+ results['img'] = img
+ results['img_shape'] = img.shape[1:]
+ results['ori_shape'] = img.shape[1:]
+
+ if self.with_seg:
+ gt_seg_map = data[-1, :]
+ if self.decode_backend == 'nifti':
+ gt_seg_map = gt_seg_map.transpose(2, 1, 0)
+
+ if self.to_xyz:
+ gt_seg_map = gt_seg_map.transpose(2, 1, 0)
+ results['gt_seg_map'] = gt_seg_map
+ return results
+
+ def __repr__(self) -> str:
+ repr_str = (f'{self.__class__.__name__}('
+ f'with_seg={self.with_seg}, '
+ f"decode_backend='{self.decode_backend}', "
+ f'to_xyz={self.to_xyz}, '
+ f'file_client_args={self.file_client_args})')
+ return repr_str
diff --git a/mmseg/datasets/transforms/transforms.py b/mmseg/datasets/transforms/transforms.py
index 8b753c599..46d3a66e0 100644
--- a/mmseg/datasets/transforms/transforms.py
+++ b/mmseg/datasets/transforms/transforms.py
@@ -1,7 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.
import copy
-from typing import Sequence, Tuple, Union
+from typing import Dict, Sequence, Tuple, Union
+import cv2
import mmcv
import numpy as np
from mmcv.transforms.base import BaseTransform
@@ -59,8 +60,8 @@ class ResizeToMultiple(BaseTransform):
if self.interpolation else 'bilinear')
results['img'] = img
- results['img_shape'] = img.shape
- results['pad_shape'] = img.shape
+ results['img_shape'] = img.shape[:2]
+ results['pad_shape'] = img.shape[:2]
# Align segmentation map to multiple of size divisor.
for key in results.get('seg_fields', []):
@@ -1147,3 +1148,81 @@ class RandomMosaic(BaseTransform):
repr_str += f'pad_val={self.pad_val}, '
repr_str += f'seg_pad_val={self.pad_val})'
return repr_str
+
+
+@TRANSFORMS.register_module()
+class GenerateEdge(BaseTransform):
+ """Generate Edge for CE2P approach.
+
+ Edge will be used to calculate loss of
+ `CE2P `_.
+
+ Modified from https://github.com/liutinglt/CE2P/blob/master/dataset/target_generation.py # noqa:E501
+
+ Required Keys:
+
+ - img_shape
+ - gt_seg_map
+
+ Added Keys:
+ - gt_edge (np.ndarray, uint8): The edge annotation generated from the
+ seg map by extracting border between different semantics.
+
+ Args:
+ edge_width (int): The width of edge. Default to 3.
+ ignore_index (int): Index that will be ignored. Default to 255.
+ """
+
+ def __init__(self, edge_width: int = 3, ignore_index: int = 255) -> None:
+ super().__init__()
+ self.edge_width = edge_width
+ self.ignore_index = ignore_index
+
+ def transform(self, results: Dict) -> Dict:
+ """Call function to generate edge from segmentation map.
+
+ Args:
+ results (dict): Result dict.
+
+ Returns:
+ dict: Result dict with edge mask.
+ """
+ h, w = results['img_shape']
+ edge = np.zeros((h, w), dtype=np.uint8)
+ seg_map = results['gt_seg_map']
+
+ # down
+ edge_down = edge[1:h, :]
+ edge_down[(seg_map[1:h, :] != seg_map[:h - 1, :])
+ & (seg_map[1:h, :] != self.ignore_index) &
+ (seg_map[:h - 1, :] != self.ignore_index)] = 1
+ # left
+ edge_left = edge[:, :w - 1]
+ edge_left[(seg_map[:, :w - 1] != seg_map[:, 1:w])
+ & (seg_map[:, :w - 1] != self.ignore_index) &
+ (seg_map[:, 1:w] != self.ignore_index)] = 1
+ # up_left
+ edge_upleft = edge[:h - 1, :w - 1]
+ edge_upleft[(seg_map[:h - 1, :w - 1] != seg_map[1:h, 1:w])
+ & (seg_map[:h - 1, :w - 1] != self.ignore_index) &
+ (seg_map[1:h, 1:w] != self.ignore_index)] = 1
+ # up_right
+ edge_upright = edge[:h - 1, 1:w]
+ edge_upright[(seg_map[:h - 1, 1:w] != seg_map[1:h, :w - 1])
+ & (seg_map[:h - 1, 1:w] != self.ignore_index) &
+ (seg_map[1:h, :w - 1] != self.ignore_index)] = 1
+
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
+ (self.edge_width, self.edge_width))
+ edge = cv2.dilate(edge, kernel)
+
+ results['gt_edge'] = edge
+ results['edge_width'] = self.edge_width
+
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'edge_width={self.edge_width}, '
+ repr_str += f'ignore_index={self.ignore_index})'
+ return repr_str
diff --git a/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py b/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py
index e614ad408..fdae3ca69 100644
--- a/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py
+++ b/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py
@@ -204,5 +204,4 @@ class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor):
warnings.warn('DeprecationWarning: Layer_decay_rate will '
'be deleted, please use decay_rate instead.')
paramwise_cfg['decay_rate'] = paramwise_cfg.pop('layer_decay_rate')
- super(LayerDecayOptimizerConstructor,
- self).__init__(optim_wrapper_cfg, paramwise_cfg)
+ super().__init__(optim_wrapper_cfg, paramwise_cfg)
diff --git a/mmseg/evaluation/metrics/citys_metric.py b/mmseg/evaluation/metrics/citys_metric.py
index af6e8b00d..50e9ea68a 100644
--- a/mmseg/evaluation/metrics/citys_metric.py
+++ b/mmseg/evaluation/metrics/citys_metric.py
@@ -76,9 +76,8 @@ class CitysMetric(BaseMetric):
output.putpalette(palette)
output.save(png_filename)
- ann_dir = osp.join(
- data_batch[0]['data_sample']['seg_map_path'].split('val')[0],
- 'val')
+ ann_dir = osp.join(data_samples[0]['seg_map_path'].split('val')[0],
+ 'val')
self.results.append(ann_dir)
def compute_metrics(self, results: list) -> Dict[str, float]:
@@ -86,9 +85,6 @@ class CitysMetric(BaseMetric):
Args:
results (list): Testing results of the dataset.
- logger (logging.Logger | str | None): Logger used for printing
- related information during evaluation. Default: None.
- imgfile_prefix (str | None): The prefix of output image file
Returns:
dict[str: float]: Cityscapes evaluation results.
diff --git a/mmseg/evaluation/metrics/iou_metric.py b/mmseg/evaluation/metrics/iou_metric.py
index a065fc218..a152ef9dd 100644
--- a/mmseg/evaluation/metrics/iou_metric.py
+++ b/mmseg/evaluation/metrics/iou_metric.py
@@ -51,7 +51,7 @@ class IoUMetric(BaseMetric):
"""Process one batch of data and data_samples.
The processed results should be stored in ``self.results``, which will
- be used to computed the metrics when all batches have been processed.
+ be used to compute the metrics when all batches have been processed.
Args:
data_batch (dict): A batch of data from the dataloader.
@@ -212,7 +212,7 @@ class IoUMetric(BaseMetric):
metrics = [metrics]
allowed_metrics = ['mIoU', 'mDice', 'mFscore']
if not set(metrics).issubset(set(allowed_metrics)):
- raise KeyError('metrics {} is not supported'.format(metrics))
+ raise KeyError(f'metrics {metrics} is not supported')
all_acc = total_area_intersect.sum() / total_area_label.sum()
ret_metrics = OrderedDict({'aAcc': all_acc})
diff --git a/mmseg/models/backbones/beit.py b/mmseg/models/backbones/beit.py
index 15d5fc797..4553f72d8 100644
--- a/mmseg/models/backbones/beit.py
+++ b/mmseg/models/backbones/beit.py
@@ -194,7 +194,7 @@ class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer):
init_values=None):
attn_cfg.update(dict(window_size=window_size, qk_scale=None))
- super(BEiTTransformerEncoderLayer, self).__init__(
+ super().__init__(
embed_dims=embed_dims,
num_heads=num_heads,
feedforward_channels=feedforward_channels,
@@ -214,9 +214,9 @@ class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer):
self.drop_path = build_dropout(
dropout_layer) if dropout_layer else nn.Identity()
self.gamma_1 = nn.Parameter(
- init_values * torch.ones((embed_dims)), requires_grad=True)
+ init_values * torch.ones(embed_dims), requires_grad=True)
self.gamma_2 = nn.Parameter(
- init_values * torch.ones((embed_dims)), requires_grad=True)
+ init_values * torch.ones(embed_dims), requires_grad=True)
def build_attn(self, attn_cfg):
self.attn = BEiTAttention(**attn_cfg)
@@ -287,7 +287,7 @@ class BEiT(BaseModule):
pretrained=None,
init_values=0.1,
init_cfg=None):
- super(BEiT, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
if isinstance(img_size, int):
img_size = to_2tuple(img_size)
elif isinstance(img_size, tuple):
@@ -505,7 +505,7 @@ class BEiT(BaseModule):
state_dict = self.resize_rel_pos_embed(checkpoint)
self.load_state_dict(state_dict, False)
elif self.init_cfg is not None:
- super(BEiT, self).init_weights()
+ super().init_weights()
else:
# We only implement the 'jax_impl' initialization implemented at
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
@@ -551,7 +551,7 @@ class BEiT(BaseModule):
return tuple(outs)
def train(self, mode=True):
- super(BEiT, self).train(mode)
+ super().train(mode)
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, nn.LayerNorm):
diff --git a/mmseg/models/backbones/bisenetv1.py b/mmseg/models/backbones/bisenetv1.py
index fa9e51383..ca58bf9c5 100644
--- a/mmseg/models/backbones/bisenetv1.py
+++ b/mmseg/models/backbones/bisenetv1.py
@@ -29,7 +29,7 @@ class SpatialPath(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(SpatialPath, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert len(num_channels) == 4, 'Length of input channels \
of Spatial Path must be 4!'
@@ -98,7 +98,7 @@ class AttentionRefinementModule(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(AttentionRefinementModule, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv_layer = ConvModule(
in_channels=in_channels,
out_channels=out_channel,
@@ -152,7 +152,7 @@ class ContextPath(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(ContextPath, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert len(context_channels) == 3, 'Length of input channels \
of Context Path must be 3!'
@@ -228,7 +228,7 @@ class FeatureFusionModule(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(FeatureFusionModule, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv1 = ConvModule(
in_channels=in_channels,
out_channels=out_channels,
@@ -304,7 +304,7 @@ class BiSeNetV1(BaseModule):
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(BiSeNetV1, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert len(spatial_channels) == 4, 'Length of input channels \
of Spatial Path must be 4!'
diff --git a/mmseg/models/backbones/bisenetv2.py b/mmseg/models/backbones/bisenetv2.py
index 256de7952..32aa49822 100644
--- a/mmseg/models/backbones/bisenetv2.py
+++ b/mmseg/models/backbones/bisenetv2.py
@@ -37,7 +37,7 @@ class DetailBranch(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(DetailBranch, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
detail_branch = []
for i in range(len(detail_channels)):
if i == 0:
@@ -126,7 +126,7 @@ class StemBlock(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(StemBlock, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv_first = ConvModule(
in_channels=in_channels,
@@ -207,7 +207,7 @@ class GELayer(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(GELayer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
mid_channel = in_channels * exp_ratio
self.conv1 = ConvModule(
in_channels=in_channels,
@@ -326,7 +326,7 @@ class CEBlock(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(CEBlock, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.out_channels = out_channels
self.gap = nn.Sequential(
@@ -385,7 +385,7 @@ class SemanticBranch(BaseModule):
in_channels=3,
exp_ratio=6,
init_cfg=None):
- super(SemanticBranch, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.semantic_channels = semantic_channels
self.semantic_stages = []
@@ -458,7 +458,7 @@ class BGALayer(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(BGALayer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.out_channels = out_channels
self.align_corners = align_corners
self.detail_dwconv = nn.Sequential(
@@ -594,7 +594,7 @@ class BiSeNetV2(BaseModule):
dict(
type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm'])
]
- super(BiSeNetV2, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.in_channels = in_channels
self.out_indices = out_indices
self.detail_channels = detail_channels
diff --git a/mmseg/models/backbones/cgnet.py b/mmseg/models/backbones/cgnet.py
index 13e9f9146..b74b494f5 100644
--- a/mmseg/models/backbones/cgnet.py
+++ b/mmseg/models/backbones/cgnet.py
@@ -25,7 +25,7 @@ class GlobalContextExtractor(nn.Module):
"""
def __init__(self, channel, reduction=16, with_cp=False):
- super(GlobalContextExtractor, self).__init__()
+ super().__init__()
self.channel = channel
self.reduction = reduction
assert reduction >= 1 and channel >= reduction
@@ -87,7 +87,7 @@ class ContextGuidedBlock(nn.Module):
norm_cfg=dict(type='BN', requires_grad=True),
act_cfg=dict(type='PReLU'),
with_cp=False):
- super(ContextGuidedBlock, self).__init__()
+ super().__init__()
self.with_cp = with_cp
self.downsample = downsample
@@ -172,7 +172,7 @@ class InputInjection(nn.Module):
"""Downsampling module for CGNet."""
def __init__(self, num_downsampling):
- super(InputInjection, self).__init__()
+ super().__init__()
self.pool = nn.ModuleList()
for i in range(num_downsampling):
self.pool.append(nn.AvgPool2d(3, stride=2, padding=1))
@@ -230,7 +230,7 @@ class CGNet(BaseModule):
pretrained=None,
init_cfg=None):
- super(CGNet, self).__init__(init_cfg)
+ super().__init__(init_cfg)
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be setting at the same time'
@@ -364,7 +364,7 @@ class CGNet(BaseModule):
def train(self, mode=True):
"""Convert the model into training mode will keeping the normalization
layer freezed."""
- super(CGNet, self).train(mode)
+ super().train(mode)
if mode and self.norm_eval:
for m in self.modules():
# trick: eval have effect on BatchNorm only
diff --git a/mmseg/models/backbones/erfnet.py b/mmseg/models/backbones/erfnet.py
index 88f41dfb1..2c5ec672a 100644
--- a/mmseg/models/backbones/erfnet.py
+++ b/mmseg/models/backbones/erfnet.py
@@ -35,7 +35,7 @@ class DownsamplerBlock(BaseModule):
norm_cfg=dict(type='BN', eps=1e-3),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(DownsamplerBlock, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
@@ -95,7 +95,7 @@ class NonBottleneck1d(BaseModule):
norm_cfg=dict(type='BN', eps=1e-3),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(NonBottleneck1d, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
@@ -168,7 +168,7 @@ class UpsamplerBlock(BaseModule):
norm_cfg=dict(type='BN', eps=1e-3),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(UpsamplerBlock, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
@@ -242,7 +242,7 @@ class ERFNet(BaseModule):
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(ERFNet, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert len(enc_downsample_channels) \
== len(dec_upsample_channels)+1, 'Number of downsample\
block of encoder does not \
diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py
index 9884b2e18..6ff7a3191 100644
--- a/mmseg/models/backbones/fast_scnn.py
+++ b/mmseg/models/backbones/fast_scnn.py
@@ -36,7 +36,7 @@ class LearningToDownsample(nn.Module):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
dw_act_cfg=None):
- super(LearningToDownsample, self).__init__()
+ super().__init__()
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
@@ -124,7 +124,7 @@ class GlobalFeatureExtractor(nn.Module):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
align_corners=False):
- super(GlobalFeatureExtractor, self).__init__()
+ super().__init__()
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
@@ -220,7 +220,7 @@ class FeatureFusionModule(nn.Module):
dwconv_act_cfg=dict(type='ReLU'),
conv_act_cfg=None,
align_corners=False):
- super(FeatureFusionModule, self).__init__()
+ super().__init__()
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.dwconv_act_cfg = dwconv_act_cfg
@@ -340,7 +340,7 @@ class FastSCNN(BaseModule):
dw_act_cfg=None,
init_cfg=None):
- super(FastSCNN, self).__init__(init_cfg)
+ super().__init__(init_cfg)
if init_cfg is None:
self.init_cfg = [
diff --git a/mmseg/models/backbones/hrnet.py b/mmseg/models/backbones/hrnet.py
index ca4e15500..2da755e73 100644
--- a/mmseg/models/backbones/hrnet.py
+++ b/mmseg/models/backbones/hrnet.py
@@ -30,7 +30,7 @@ class HRModule(BaseModule):
norm_cfg=dict(type='BN', requires_grad=True),
block_init_cfg=None,
init_cfg=None):
- super(HRModule, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.block_init_cfg = block_init_cfg
self._check_branches(num_branches, num_blocks, in_channels,
num_channels)
@@ -308,7 +308,7 @@ class HRNet(BaseModule):
multiscale_output=True,
pretrained=None,
init_cfg=None):
- super(HRNet, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.pretrained = pretrained
self.zero_init_residual = zero_init_residual
@@ -633,7 +633,7 @@ class HRNet(BaseModule):
def train(self, mode=True):
"""Convert the model into training mode will keeping the normalization
layer freezed."""
- super(HRNet, self).train(mode)
+ super().train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
diff --git a/mmseg/models/backbones/icnet.py b/mmseg/models/backbones/icnet.py
index faa6d15cc..8ff344856 100644
--- a/mmseg/models/backbones/icnet.py
+++ b/mmseg/models/backbones/icnet.py
@@ -64,7 +64,7 @@ class ICNet(BaseModule):
dict(type='Constant', val=1, layer='_BatchNorm'),
dict(type='Normal', mean=0.01, layer='Linear')
]
- super(ICNet, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.align_corners = align_corners
self.backbone = MODELS.build(backbone_cfg)
diff --git a/mmseg/models/backbones/mae.py b/mmseg/models/backbones/mae.py
index 1e7b932bf..a1f243f08 100644
--- a/mmseg/models/backbones/mae.py
+++ b/mmseg/models/backbones/mae.py
@@ -100,7 +100,7 @@ class MAE(BEiT):
pretrained=None,
init_values=0.1,
init_cfg=None):
- super(MAE, self).__init__(
+ super().__init__(
img_size=img_size,
patch_size=patch_size,
in_channels=in_channels,
@@ -186,7 +186,7 @@ class MAE(BEiT):
state_dict = self.resize_abs_pos_embed(state_dict)
self.load_state_dict(state_dict, False)
elif self.init_cfg is not None:
- super(MAE, self).init_weights()
+ super().init_weights()
else:
# We only implement the 'jax_impl' initialization implemented at
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
diff --git a/mmseg/models/backbones/mit.py b/mmseg/models/backbones/mit.py
index faea1d0ef..66556bdfc 100644
--- a/mmseg/models/backbones/mit.py
+++ b/mmseg/models/backbones/mit.py
@@ -44,7 +44,7 @@ class MixFFN(BaseModule):
ffn_drop=0.,
dropout_layer=None,
init_cfg=None):
- super(MixFFN, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.embed_dims = embed_dims
self.feedforward_channels = feedforward_channels
@@ -253,7 +253,7 @@ class TransformerEncoderLayer(BaseModule):
batch_first=True,
sr_ratio=1,
with_cp=False):
- super(TransformerEncoderLayer, self).__init__()
+ super().__init__()
# The ret[0] of build_norm_layer is norm name.
self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]
@@ -357,7 +357,7 @@ class MixVisionTransformer(BaseModule):
pretrained=None,
init_cfg=None,
with_cp=False):
- super(MixVisionTransformer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be set at the same time'
@@ -433,7 +433,7 @@ class MixVisionTransformer(BaseModule):
normal_init(
m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0)
else:
- super(MixVisionTransformer, self).init_weights()
+ super().init_weights()
def forward(self, x):
outs = []
diff --git a/mmseg/models/backbones/mobilenet_v2.py b/mmseg/models/backbones/mobilenet_v2.py
index 32224e08a..1c21b5df9 100644
--- a/mmseg/models/backbones/mobilenet_v2.py
+++ b/mmseg/models/backbones/mobilenet_v2.py
@@ -63,7 +63,7 @@ class MobileNetV2(BaseModule):
with_cp=False,
pretrained=None,
init_cfg=None):
- super(MobileNetV2, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.pretrained = pretrained
assert not (init_cfg and pretrained), \
@@ -189,7 +189,7 @@ class MobileNetV2(BaseModule):
param.requires_grad = False
def train(self, mode=True):
- super(MobileNetV2, self).train(mode)
+ super().train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
diff --git a/mmseg/models/backbones/mobilenet_v3.py b/mmseg/models/backbones/mobilenet_v3.py
index 93717df48..1efb6e097 100644
--- a/mmseg/models/backbones/mobilenet_v3.py
+++ b/mmseg/models/backbones/mobilenet_v3.py
@@ -81,7 +81,7 @@ class MobileNetV3(BaseModule):
with_cp=False,
pretrained=None,
init_cfg=None):
- super(MobileNetV3, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.pretrained = pretrained
assert not (init_cfg and pretrained), \
@@ -175,7 +175,7 @@ class MobileNetV3(BaseModule):
act_cfg=dict(type=act),
with_cp=self.with_cp)
in_channels = out_channels
- layer_name = 'layer{}'.format(i + 1)
+ layer_name = f'layer{i + 1}'
self.add_module(layer_name, layer)
layers.append(layer_name)
@@ -192,7 +192,7 @@ class MobileNetV3(BaseModule):
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=dict(type='HSwish'))
- layer_name = 'layer{}'.format(len(layer_setting) + 1)
+ layer_name = f'layer{len(layer_setting) + 1}'
self.add_module(layer_name, layer)
layers.append(layer_name)
@@ -259,7 +259,7 @@ class MobileNetV3(BaseModule):
param.requires_grad = False
def train(self, mode=True):
- super(MobileNetV3, self).train(mode)
+ super().train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
diff --git a/mmseg/models/backbones/resnest.py b/mmseg/models/backbones/resnest.py
index 519bd9738..3cc380b44 100644
--- a/mmseg/models/backbones/resnest.py
+++ b/mmseg/models/backbones/resnest.py
@@ -69,7 +69,7 @@ class SplitAttentionConv2d(nn.Module):
conv_cfg=None,
norm_cfg=dict(type='BN'),
dcn=None):
- super(SplitAttentionConv2d, self).__init__()
+ super().__init__()
inter_channels = max(in_channels * radix // reduction_factor, 32)
self.radix = radix
self.groups = groups
@@ -174,7 +174,7 @@ class Bottleneck(_Bottleneck):
avg_down_stride=True,
**kwargs):
"""Bottleneck block for ResNeSt."""
- super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+ super().__init__(inplanes, planes, **kwargs)
if groups == 1:
width = self.planes
@@ -304,7 +304,7 @@ class ResNeSt(ResNetV1d):
self.radix = radix
self.reduction_factor = reduction_factor
self.avg_down_stride = avg_down_stride
- super(ResNeSt, self).__init__(**kwargs)
+ super().__init__(**kwargs)
def make_res_layer(self, **kwargs):
"""Pack all blocks in a stage into a ``ResLayer``."""
diff --git a/mmseg/models/backbones/resnet.py b/mmseg/models/backbones/resnet.py
index 51ac7faab..9226c90d8 100644
--- a/mmseg/models/backbones/resnet.py
+++ b/mmseg/models/backbones/resnet.py
@@ -29,7 +29,7 @@ class BasicBlock(BaseModule):
dcn=None,
plugins=None,
init_cfg=None):
- super(BasicBlock, self).__init__(init_cfg)
+ super().__init__(init_cfg)
assert dcn is None, 'Not implemented yet.'
assert plugins is None, 'Not implemented yet.'
@@ -118,7 +118,7 @@ class Bottleneck(BaseModule):
dcn=None,
plugins=None,
init_cfg=None):
- super(Bottleneck, self).__init__(init_cfg)
+ super().__init__(init_cfg)
assert style in ['pytorch', 'caffe']
assert dcn is None or isinstance(dcn, dict)
assert plugins is None or isinstance(plugins, list)
@@ -418,7 +418,7 @@ class ResNet(BaseModule):
zero_init_residual=True,
pretrained=None,
init_cfg=None):
- super(ResNet, self).__init__(init_cfg)
+ super().__init__(init_cfg)
if depth not in self.arch_settings:
raise KeyError(f'invalid depth {depth} for resnet')
@@ -676,7 +676,7 @@ class ResNet(BaseModule):
def train(self, mode=True):
"""Convert the model into training mode while keep normalization layer
freezed."""
- super(ResNet, self).train(mode)
+ super().train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
@@ -696,8 +696,7 @@ class ResNetV1c(ResNet):
"""
def __init__(self, **kwargs):
- super(ResNetV1c, self).__init__(
- deep_stem=True, avg_down=False, **kwargs)
+ super().__init__(deep_stem=True, avg_down=False, **kwargs)
@MODELS.register_module()
@@ -710,5 +709,4 @@ class ResNetV1d(ResNet):
"""
def __init__(self, **kwargs):
- super(ResNetV1d, self).__init__(
- deep_stem=True, avg_down=True, **kwargs)
+ super().__init__(deep_stem=True, avg_down=True, **kwargs)
diff --git a/mmseg/models/backbones/resnext.py b/mmseg/models/backbones/resnext.py
index 2f7cacab7..67a244a12 100644
--- a/mmseg/models/backbones/resnext.py
+++ b/mmseg/models/backbones/resnext.py
@@ -23,7 +23,7 @@ class Bottleneck(_Bottleneck):
base_width=4,
base_channels=64,
**kwargs):
- super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
+ super().__init__(inplanes, planes, **kwargs)
if groups == 1:
width = self.planes
@@ -139,7 +139,7 @@ class ResNeXt(ResNet):
def __init__(self, groups=1, base_width=4, **kwargs):
self.groups = groups
self.base_width = base_width
- super(ResNeXt, self).__init__(**kwargs)
+ super().__init__(**kwargs)
def make_res_layer(self, **kwargs):
"""Pack all blocks in a stage into a ``ResLayer``"""
diff --git a/mmseg/models/backbones/stdc.py b/mmseg/models/backbones/stdc.py
index 340d6ee4c..758a3c92e 100644
--- a/mmseg/models/backbones/stdc.py
+++ b/mmseg/models/backbones/stdc.py
@@ -35,7 +35,7 @@ class STDCModule(BaseModule):
num_convs=4,
fusion_type='add',
init_cfg=None):
- super(STDCModule, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert num_convs > 1
assert fusion_type in ['add', 'cat']
self.stride = stride
@@ -155,7 +155,7 @@ class FeatureFusionModule(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(FeatureFusionModule, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
channels = out_channels // scale_factor
self.conv0 = ConvModule(
in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
@@ -240,7 +240,7 @@ class STDCNet(BaseModule):
with_final_conv=False,
pretrained=None,
init_cfg=None):
- super(STDCNet, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert stdc_type in self.arch_settings, \
f'invalid structure {stdc_type} for STDCNet.'
assert bottleneck_type in ['add', 'cat'],\
@@ -370,7 +370,7 @@ class STDCContextPathNet(BaseModule):
align_corners=None,
norm_cfg=dict(type='BN'),
init_cfg=None):
- super(STDCContextPathNet, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.backbone = MODELS.build(backbone_cfg)
self.arms = ModuleList()
self.convs = ModuleList()
diff --git a/mmseg/models/backbones/swin.py b/mmseg/models/backbones/swin.py
index 884548d2f..57ab99085 100644
--- a/mmseg/models/backbones/swin.py
+++ b/mmseg/models/backbones/swin.py
@@ -326,7 +326,7 @@ class SwinBlock(BaseModule):
with_cp=False,
init_cfg=None):
- super(SwinBlock, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.with_cp = with_cp
@@ -561,7 +561,7 @@ class SwinTransformer(BaseModule):
else:
raise TypeError('pretrained must be a str or None')
- super(SwinTransformer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
num_layers = len(depths)
self.out_indices = out_indices
@@ -636,7 +636,7 @@ class SwinTransformer(BaseModule):
def train(self, mode=True):
"""Convert the model into training mode while keep layers freezed."""
- super(SwinTransformer, self).train(mode)
+ super().train(mode)
self._freeze_stages()
def _freeze_stages(self):
diff --git a/mmseg/models/backbones/timm_backbone.py b/mmseg/models/backbones/timm_backbone.py
index 3ecb1c4a9..1eef302bd 100644
--- a/mmseg/models/backbones/timm_backbone.py
+++ b/mmseg/models/backbones/timm_backbone.py
@@ -37,7 +37,7 @@ class TIMMBackbone(BaseModule):
):
if timm is None:
raise RuntimeError('timm is not installed')
- super(TIMMBackbone, self).__init__(init_cfg)
+ super().__init__(init_cfg)
if 'norm_layer' in kwargs:
kwargs['norm_layer'] = MMENGINE_MODELS.get(kwargs['norm_layer'])
self.timm_model = timm.create_model(
diff --git a/mmseg/models/backbones/twins.py b/mmseg/models/backbones/twins.py
index 9ee11f0ab..b6a6eea79 100644
--- a/mmseg/models/backbones/twins.py
+++ b/mmseg/models/backbones/twins.py
@@ -62,7 +62,7 @@ class GlobalSubsampledAttention(EfficientMultiheadAttention):
norm_cfg=dict(type='LN'),
sr_ratio=1,
init_cfg=None):
- super(GlobalSubsampledAttention, self).__init__(
+ super().__init__(
embed_dims,
num_heads,
attn_drop=attn_drop,
@@ -112,7 +112,7 @@ class GSAEncoderLayer(BaseModule):
norm_cfg=dict(type='LN'),
sr_ratio=1.,
init_cfg=None):
- super(GSAEncoderLayer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
self.attn = GlobalSubsampledAttention(
@@ -172,7 +172,7 @@ class LocallyGroupedSelfAttention(BaseModule):
proj_drop_rate=0.,
window_size=1,
init_cfg=None):
- super(LocallyGroupedSelfAttention, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert embed_dims % num_heads == 0, f'dim {embed_dims} should be ' \
f'divided by num_heads ' \
@@ -284,7 +284,7 @@ class LSAEncoderLayer(BaseModule):
window_size=1,
init_cfg=None):
- super(LSAEncoderLayer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
self.attn = LocallyGroupedSelfAttention(embed_dims, num_heads,
@@ -325,7 +325,7 @@ class ConditionalPositionEncoding(BaseModule):
"""
def __init__(self, in_channels, embed_dims=768, stride=1, init_cfg=None):
- super(ConditionalPositionEncoding, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.proj = nn.Conv2d(
in_channels,
embed_dims,
@@ -401,7 +401,7 @@ class PCPVT(BaseModule):
norm_after_stage=False,
pretrained=None,
init_cfg=None):
- super(PCPVT, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert not (init_cfg and pretrained), \
'init_cfg and pretrained cannot be set at the same time'
if isinstance(pretrained, str):
@@ -471,7 +471,7 @@ class PCPVT(BaseModule):
def init_weights(self):
if self.init_cfg is not None:
- super(PCPVT, self).init_weights()
+ super().init_weights()
else:
for m in self.modules():
if isinstance(m, nn.Linear):
@@ -563,11 +563,11 @@ class SVT(PCPVT):
norm_after_stage=True,
pretrained=None,
init_cfg=None):
- super(SVT, self).__init__(in_channels, embed_dims, patch_sizes,
- strides, num_heads, mlp_ratios, out_indices,
- qkv_bias, drop_rate, attn_drop_rate,
- drop_path_rate, norm_cfg, depths, sr_ratios,
- norm_after_stage, pretrained, init_cfg)
+ super().__init__(in_channels, embed_dims, patch_sizes, strides,
+ num_heads, mlp_ratios, out_indices, qkv_bias,
+ drop_rate, attn_drop_rate, drop_path_rate, norm_cfg,
+ depths, sr_ratios, norm_after_stage, pretrained,
+ init_cfg)
# transformer encoder
dpr = [
x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
diff --git a/mmseg/models/backbones/unet.py b/mmseg/models/backbones/unet.py
index f25658ad9..545921db8 100644
--- a/mmseg/models/backbones/unet.py
+++ b/mmseg/models/backbones/unet.py
@@ -53,7 +53,7 @@ class BasicConvBlock(nn.Module):
act_cfg=dict(type='ReLU'),
dcn=None,
plugins=None):
- super(BasicConvBlock, self).__init__()
+ super().__init__()
assert dcn is None, 'Not implemented yet.'
assert plugins is None, 'Not implemented yet.'
@@ -112,7 +112,7 @@ class DeconvModule(nn.Module):
*,
kernel_size=4,
scale_factor=2):
- super(DeconvModule, self).__init__()
+ super().__init__()
assert (kernel_size - scale_factor >= 0) and\
(kernel_size - scale_factor) % 2 == 0,\
@@ -191,7 +191,7 @@ class InterpConv(nn.Module):
padding=0,
upsample_cfg=dict(
scale_factor=2, mode='bilinear', align_corners=False)):
- super(InterpConv, self).__init__()
+ super().__init__()
self.with_cp = with_cp
conv = ConvModule(
@@ -298,7 +298,7 @@ class UNet(BaseModule):
plugins=None,
pretrained=None,
init_cfg=None):
- super(UNet, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.pretrained = pretrained
assert not (init_cfg and pretrained), \
@@ -396,7 +396,7 @@ class UNet(BaseModule):
act_cfg=act_cfg,
dcn=None,
plugins=None))
- self.encoder.append((nn.Sequential(*enc_conv_block)))
+ self.encoder.append(nn.Sequential(*enc_conv_block))
in_channels = base_channels * 2**i
def forward(self, x):
@@ -415,7 +415,7 @@ class UNet(BaseModule):
def train(self, mode=True):
"""Convert the model into training mode while keep normalization layer
freezed."""
- super(UNet, self).train(mode)
+ super().train(mode)
if mode and self.norm_eval:
for m in self.modules():
# trick: eval have effect on BatchNorm only
diff --git a/mmseg/models/backbones/vit.py b/mmseg/models/backbones/vit.py
index e9e87dc3b..3c96f6549 100644
--- a/mmseg/models/backbones/vit.py
+++ b/mmseg/models/backbones/vit.py
@@ -60,7 +60,7 @@ class TransformerEncoderLayer(BaseModule):
attn_cfg=dict(),
ffn_cfg=dict(),
with_cp=False):
- super(TransformerEncoderLayer, self).__init__()
+ super().__init__()
self.norm1_name, norm1 = build_norm_layer(
norm_cfg, embed_dims, postfix=1)
@@ -197,7 +197,7 @@ class VisionTransformer(BaseModule):
with_cp=False,
pretrained=None,
init_cfg=None):
- super(VisionTransformer, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
if isinstance(img_size, int):
img_size = to_2tuple(img_size)
@@ -315,7 +315,7 @@ class VisionTransformer(BaseModule):
load_state_dict(self, state_dict, strict=False, logger=None)
elif self.init_cfg is not None:
- super(VisionTransformer, self).init_weights()
+ super().init_weights()
else:
# We only implement the 'jax_impl' initialization implemented at
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
@@ -335,7 +335,7 @@ class VisionTransformer(BaseModule):
constant_init(m, val=1.0, bias=0.)
def _pos_embeding(self, patched_img, hw_shape, pos_embed):
- """Positiong embeding method.
+ """Positioning embeding method.
Resize the pos_embed, if the input image size doesn't match
the training size.
@@ -431,7 +431,7 @@ class VisionTransformer(BaseModule):
return tuple(outs)
def train(self, mode=True):
- super(VisionTransformer, self).train(mode)
+ super().train(mode)
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, nn.LayerNorm):
diff --git a/mmseg/models/decode_heads/ann_head.py b/mmseg/models/decode_heads/ann_head.py
index 9cc791b26..2b40ef5aa 100644
--- a/mmseg/models/decode_heads/ann_head.py
+++ b/mmseg/models/decode_heads/ann_head.py
@@ -17,7 +17,7 @@ class PPMConcat(nn.ModuleList):
"""
def __init__(self, pool_scales=(1, 3, 6, 8)):
- super(PPMConcat, self).__init__(
+ super().__init__(
[nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales])
def forward(self, feats):
@@ -58,7 +58,7 @@ class SelfAttentionBlock(_SelfAttentionBlock):
query_downsample = nn.MaxPool2d(kernel_size=query_scale)
else:
query_downsample = None
- super(SelfAttentionBlock, self).__init__(
+ super().__init__(
key_in_channels=low_in_channels,
query_in_channels=high_in_channels,
channels=channels,
@@ -100,7 +100,7 @@ class AFNB(nn.Module):
def __init__(self, low_in_channels, high_in_channels, channels,
out_channels, query_scales, key_pool_scales, conv_cfg,
norm_cfg, act_cfg):
- super(AFNB, self).__init__()
+ super().__init__()
self.stages = nn.ModuleList()
for query_scale in query_scales:
self.stages.append(
@@ -150,7 +150,7 @@ class APNB(nn.Module):
def __init__(self, in_channels, channels, out_channels, query_scales,
key_pool_scales, conv_cfg, norm_cfg, act_cfg):
- super(APNB, self).__init__()
+ super().__init__()
self.stages = nn.ModuleList()
for query_scale in query_scales:
self.stages.append(
@@ -201,8 +201,7 @@ class ANNHead(BaseDecodeHead):
query_scales=(1, ),
key_pool_scales=(1, 3, 6, 8),
**kwargs):
- super(ANNHead, self).__init__(
- input_transform='multiple_select', **kwargs)
+ super().__init__(input_transform='multiple_select', **kwargs)
assert len(self.in_channels) == 2
low_in_channels, high_in_channels = self.in_channels
self.project_channels = project_channels
diff --git a/mmseg/models/decode_heads/apc_head.py b/mmseg/models/decode_heads/apc_head.py
index 187fdb0e9..728f39659 100644
--- a/mmseg/models/decode_heads/apc_head.py
+++ b/mmseg/models/decode_heads/apc_head.py
@@ -25,7 +25,7 @@ class ACM(nn.Module):
def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg,
norm_cfg, act_cfg):
- super(ACM, self).__init__()
+ super().__init__()
self.pool_scale = pool_scale
self.fusion = fusion
self.in_channels = in_channels
@@ -123,7 +123,7 @@ class APCHead(BaseDecodeHead):
"""
def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs):
- super(APCHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert isinstance(pool_scales, (list, tuple))
self.pool_scales = pool_scales
self.fusion = fusion
diff --git a/mmseg/models/decode_heads/aspp_head.py b/mmseg/models/decode_heads/aspp_head.py
index 757e69359..6d7185d7d 100644
--- a/mmseg/models/decode_heads/aspp_head.py
+++ b/mmseg/models/decode_heads/aspp_head.py
@@ -22,7 +22,7 @@ class ASPPModule(nn.ModuleList):
def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg,
act_cfg):
- super(ASPPModule, self).__init__()
+ super().__init__()
self.dilations = dilations
self.in_channels = in_channels
self.channels = channels
@@ -63,7 +63,7 @@ class ASPPHead(BaseDecodeHead):
"""
def __init__(self, dilations=(1, 6, 12, 18), **kwargs):
- super(ASPPHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert isinstance(dilations, (list, tuple))
self.dilations = dilations
self.image_pool = nn.Sequential(
diff --git a/mmseg/models/decode_heads/cascade_decode_head.py b/mmseg/models/decode_heads/cascade_decode_head.py
index 82d6c3af4..fe2bcb930 100644
--- a/mmseg/models/decode_heads/cascade_decode_head.py
+++ b/mmseg/models/decode_heads/cascade_decode_head.py
@@ -13,7 +13,7 @@ class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta):
:class:`CascadeEncoderDecoder."""
def __init__(self, *args, **kwargs):
- super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
@abstractmethod
def forward(self, inputs, prev_output):
diff --git a/mmseg/models/decode_heads/cc_head.py b/mmseg/models/decode_heads/cc_head.py
index 03ad3db76..e9075a264 100644
--- a/mmseg/models/decode_heads/cc_head.py
+++ b/mmseg/models/decode_heads/cc_head.py
@@ -26,7 +26,7 @@ class CCHead(FCNHead):
if CrissCrossAttention is None:
raise RuntimeError('Please install mmcv-full for '
'CrissCrossAttention ops')
- super(CCHead, self).__init__(num_convs=2, **kwargs)
+ super().__init__(num_convs=2, **kwargs)
self.recurrence = recurrence
self.cca = CrissCrossAttention(self.channels)
diff --git a/mmseg/models/decode_heads/da_head.py b/mmseg/models/decode_heads/da_head.py
index 6a58e256a..d87214365 100644
--- a/mmseg/models/decode_heads/da_head.py
+++ b/mmseg/models/decode_heads/da_head.py
@@ -21,7 +21,7 @@ class PAM(_SelfAttentionBlock):
"""
def __init__(self, in_channels, channels):
- super(PAM, self).__init__(
+ super().__init__(
key_in_channels=in_channels,
query_in_channels=in_channels,
channels=channels,
@@ -43,7 +43,7 @@ class PAM(_SelfAttentionBlock):
def forward(self, x):
"""Forward function."""
- out = super(PAM, self).forward(x, x)
+ out = super().forward(x, x)
out = self.gamma(out) + x
return out
@@ -53,7 +53,7 @@ class CAM(nn.Module):
"""Channel Attention Module (CAM)"""
def __init__(self):
- super(CAM, self).__init__()
+ super().__init__()
self.gamma = Scale(0)
def forward(self, x):
@@ -86,7 +86,7 @@ class DAHead(BaseDecodeHead):
"""
def __init__(self, pam_channels, **kwargs):
- super(DAHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.pam_channels = pam_channels
self.pam_in_conv = ConvModule(
self.in_channels,
@@ -173,15 +173,12 @@ class DAHead(BaseDecodeHead):
loss = dict()
loss.update(
add_prefix(
- super(DAHead, self).loss_by_feat(pam_cam_seg_logit,
- batch_data_samples),
+ super().loss_by_feat(pam_cam_seg_logit, batch_data_samples),
'pam_cam'))
loss.update(
- add_prefix(
- super(DAHead, self).loss_by_feat(pam_seg_logit,
- batch_data_samples), 'pam'))
+ add_prefix(super().loss_by_feat(pam_seg_logit, batch_data_samples),
+ 'pam'))
loss.update(
- add_prefix(
- super(DAHead, self).loss_by_feat(cam_seg_logit,
- batch_data_samples), 'cam'))
+ add_prefix(super().loss_by_feat(cam_seg_logit, batch_data_samples),
+ 'cam'))
return loss
diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py
index 6fdfbba1d..c7223f944 100644
--- a/mmseg/models/decode_heads/decode_head.py
+++ b/mmseg/models/decode_heads/decode_head.py
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
from abc import ABCMeta, abstractmethod
from typing import List, Tuple
@@ -44,6 +45,9 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta):
in_channels (int|Sequence[int]): Input channels.
channels (int): Channels after modules, before conv_seg.
num_classes (int): Number of classes.
+ out_channels (int): Output channels of conv_seg.
+ threshold (float): Threshold for binary segmentation in the case of
+ `num_classes==1`. Default: None.
dropout_ratio (float): Ratio of dropout layer. Default: 0.1.
conv_cfg (dict|None): Config of conv layers. Default: None.
norm_cfg (dict|None): Config of norm layers. Default: None.
@@ -82,6 +86,8 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta):
channels,
*,
num_classes,
+ out_channels=None,
+ threshold=None,
dropout_ratio=0.1,
conv_cfg=None,
norm_cfg=None,
@@ -97,10 +103,9 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta):
align_corners=False,
init_cfg=dict(
type='Normal', std=0.01, override=dict(name='conv_seg'))):
- super(BaseDecodeHead, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self._init_inputs(in_channels, in_index, input_transform)
self.channels = channels
- self.num_classes = num_classes
self.dropout_ratio = dropout_ratio
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
@@ -110,6 +115,30 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta):
self.ignore_index = ignore_index
self.align_corners = align_corners
+ if out_channels is None:
+ if num_classes == 2:
+ warnings.warn('For binary segmentation, we suggest using'
+ '`out_channels = 1` to define the output'
+ 'channels of segmentor, and use `threshold`'
+ 'to convert seg_logist into a prediction'
+ 'applying a threshold')
+ out_channels = num_classes
+
+ if out_channels != num_classes and out_channels != 1:
+ raise ValueError(
+ 'out_channels should be equal to num_classes,'
+ 'except binary segmentation set out_channels == 1 and'
+ f'num_classes == 2, but got out_channels={out_channels}'
+ f'and num_classes={num_classes}')
+
+ if out_channels == 1 and threshold is None:
+ threshold = 0.3
+ warnings.warn('threshold is not defined for binary, and defaults'
+ 'to 0.3')
+ self.num_classes = num_classes
+ self.out_channels = out_channels
+ self.threshold = threshold
+
if isinstance(loss_decode, dict):
self.loss_decode = build_loss(loss_decode)
elif isinstance(loss_decode, (list, tuple)):
@@ -125,7 +154,7 @@ class BaseDecodeHead(BaseModule, metaclass=ABCMeta):
else:
self.sampler = None
- self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1)
+ self.conv_seg = nn.Conv2d(channels, self.out_channels, kernel_size=1)
if dropout_ratio > 0:
self.dropout = nn.Dropout2d(dropout_ratio)
else:
diff --git a/mmseg/models/decode_heads/dm_head.py b/mmseg/models/decode_heads/dm_head.py
index 30405e3eb..7694abd8a 100644
--- a/mmseg/models/decode_heads/dm_head.py
+++ b/mmseg/models/decode_heads/dm_head.py
@@ -24,7 +24,7 @@ class DCM(nn.Module):
def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg,
norm_cfg, act_cfg):
- super(DCM, self).__init__()
+ super().__init__()
self.filter_size = filter_size
self.fusion = fusion
self.in_channels = in_channels
@@ -105,7 +105,7 @@ class DMHead(BaseDecodeHead):
"""
def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs):
- super(DMHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert isinstance(filter_sizes, (list, tuple))
self.filter_sizes = filter_sizes
self.fusion = fusion
diff --git a/mmseg/models/decode_heads/dnl_head.py b/mmseg/models/decode_heads/dnl_head.py
index 400a17556..248c11814 100644
--- a/mmseg/models/decode_heads/dnl_head.py
+++ b/mmseg/models/decode_heads/dnl_head.py
@@ -111,7 +111,7 @@ class DNLHead(FCNHead):
mode='embedded_gaussian',
temperature=0.05,
**kwargs):
- super(DNLHead, self).__init__(num_convs=2, **kwargs)
+ super().__init__(num_convs=2, **kwargs)
self.reduction = reduction
self.use_scale = use_scale
self.mode = mode
diff --git a/mmseg/models/decode_heads/dpt_head.py b/mmseg/models/decode_heads/dpt_head.py
index 989b53aa4..d2cfd89da 100644
--- a/mmseg/models/decode_heads/dpt_head.py
+++ b/mmseg/models/decode_heads/dpt_head.py
@@ -30,7 +30,7 @@ class ReassembleBlocks(BaseModule):
readout_type='ignore',
patch_size=16,
init_cfg=None):
- super(ReassembleBlocks, self).__init__(init_cfg)
+ super().__init__(init_cfg)
assert readout_type in ['ignore', 'add', 'project']
self.readout_type = readout_type
@@ -116,7 +116,7 @@ class PreActResidualConvUnit(BaseModule):
stride=1,
dilation=1,
init_cfg=None):
- super(PreActResidualConvUnit, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.conv1 = ConvModule(
in_channels,
@@ -168,7 +168,7 @@ class FeatureFusionBlock(BaseModule):
expand=False,
align_corners=True,
init_cfg=None):
- super(FeatureFusionBlock, self).__init__(init_cfg)
+ super().__init__(init_cfg)
self.in_channels = in_channels
self.expand = expand
@@ -242,7 +242,7 @@ class DPTHead(BaseDecodeHead):
act_cfg=dict(type='ReLU'),
norm_cfg=dict(type='BN'),
**kwargs):
- super(DPTHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.in_channels = self.in_channels
self.expand_channels = expand_channels
diff --git a/mmseg/models/decode_heads/ema_head.py b/mmseg/models/decode_heads/ema_head.py
index d7923f424..ab8dbb0c2 100644
--- a/mmseg/models/decode_heads/ema_head.py
+++ b/mmseg/models/decode_heads/ema_head.py
@@ -30,7 +30,7 @@ class EMAModule(nn.Module):
"""
def __init__(self, channels, num_bases, num_stages, momentum):
- super(EMAModule, self).__init__()
+ super().__init__()
assert num_stages >= 1, 'num_stages must be at least 1!'
self.num_bases = num_bases
self.num_stages = num_stages
@@ -99,7 +99,7 @@ class EMAHead(BaseDecodeHead):
concat_input=True,
momentum=0.1,
**kwargs):
- super(EMAHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.ema_channels = ema_channels
self.num_bases = num_bases
self.num_stages = num_stages
diff --git a/mmseg/models/decode_heads/enc_head.py b/mmseg/models/decode_heads/enc_head.py
index cfceb9428..ef48fb699 100644
--- a/mmseg/models/decode_heads/enc_head.py
+++ b/mmseg/models/decode_heads/enc_head.py
@@ -26,7 +26,7 @@ class EncModule(nn.Module):
"""
def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg):
- super(EncModule, self).__init__()
+ super().__init__()
self.encoding_project = ConvModule(
in_channels,
in_channels,
@@ -90,8 +90,7 @@ class EncHead(BaseDecodeHead):
use_sigmoid=True,
loss_weight=0.2),
**kwargs):
- super(EncHead, self).__init__(
- input_transform='multiple_select', **kwargs)
+ super().__init__(input_transform='multiple_select', **kwargs)
self.use_se_loss = use_se_loss
self.add_lateral = add_lateral
self.num_codes = num_codes
@@ -188,8 +187,7 @@ class EncHead(BaseDecodeHead):
"""Compute segmentation and semantic encoding loss."""
seg_logit, se_seg_logit = seg_logit
loss = dict()
- loss.update(
- super(EncHead, self).loss_by_feat(seg_logit, batch_data_samples))
+ loss.update(super().loss_by_feat(seg_logit, batch_data_samples))
seg_label = self._stack_batch_gt(batch_data_samples)
se_loss = self.loss_se_decode(
diff --git a/mmseg/models/decode_heads/fcn_head.py b/mmseg/models/decode_heads/fcn_head.py
index 4e3b974a8..341801888 100644
--- a/mmseg/models/decode_heads/fcn_head.py
+++ b/mmseg/models/decode_heads/fcn_head.py
@@ -31,7 +31,7 @@ class FCNHead(BaseDecodeHead):
self.num_convs = num_convs
self.concat_input = concat_input
self.kernel_size = kernel_size
- super(FCNHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
if num_convs == 0:
assert self.in_channels == self.channels
diff --git a/mmseg/models/decode_heads/fpn_head.py b/mmseg/models/decode_heads/fpn_head.py
index a9af1feec..25f481fe8 100644
--- a/mmseg/models/decode_heads/fpn_head.py
+++ b/mmseg/models/decode_heads/fpn_head.py
@@ -22,8 +22,7 @@ class FPNHead(BaseDecodeHead):
"""
def __init__(self, feature_strides, **kwargs):
- super(FPNHead, self).__init__(
- input_transform='multiple_select', **kwargs)
+ super().__init__(input_transform='multiple_select', **kwargs)
assert len(feature_strides) == len(self.in_channels)
assert min(feature_strides) == feature_strides[0]
self.feature_strides = feature_strides
diff --git a/mmseg/models/decode_heads/gc_head.py b/mmseg/models/decode_heads/gc_head.py
index e89b92d8b..14f0ef021 100644
--- a/mmseg/models/decode_heads/gc_head.py
+++ b/mmseg/models/decode_heads/gc_head.py
@@ -26,7 +26,7 @@ class GCHead(FCNHead):
pooling_type='att',
fusion_types=('channel_add', ),
**kwargs):
- super(GCHead, self).__init__(num_convs=2, **kwargs)
+ super().__init__(num_convs=2, **kwargs)
self.ratio = ratio
self.pooling_type = pooling_type
self.fusion_types = fusion_types
diff --git a/mmseg/models/decode_heads/isa_head.py b/mmseg/models/decode_heads/isa_head.py
index 3769bdff4..355f215f3 100644
--- a/mmseg/models/decode_heads/isa_head.py
+++ b/mmseg/models/decode_heads/isa_head.py
@@ -22,7 +22,7 @@ class SelfAttentionBlock(_SelfAttentionBlock):
"""
def __init__(self, in_channels, channels, conv_cfg, norm_cfg, act_cfg):
- super(SelfAttentionBlock, self).__init__(
+ super().__init__(
key_in_channels=in_channels,
query_in_channels=in_channels,
channels=channels,
@@ -51,7 +51,7 @@ class SelfAttentionBlock(_SelfAttentionBlock):
def forward(self, x):
"""Forward function."""
- context = super(SelfAttentionBlock, self).forward(x, x)
+ context = super().forward(x, x)
return self.output_project(context)
@@ -68,7 +68,7 @@ class ISAHead(BaseDecodeHead):
"""
def __init__(self, isa_channels, down_factor=(8, 8), **kwargs):
- super(ISAHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.down_factor = down_factor
self.in_conv = ConvModule(
diff --git a/mmseg/models/decode_heads/knet_head.py b/mmseg/models/decode_heads/knet_head.py
index 1b19b4a0f..181ae0216 100644
--- a/mmseg/models/decode_heads/knet_head.py
+++ b/mmseg/models/decode_heads/knet_head.py
@@ -48,7 +48,7 @@ class KernelUpdator(nn.Module):
norm_cfg=dict(type='LN'),
act_cfg=dict(type='ReLU', inplace=True),
):
- super(KernelUpdator, self).__init__()
+ super().__init__()
self.in_channels = in_channels
self.feat_channels = feat_channels
self.out_channels_raw = out_channels
@@ -213,7 +213,7 @@ class KernelUpdateHead(nn.Module):
out_channels=256,
act_cfg=dict(type='ReLU', inplace=True),
norm_cfg=dict(type='LN'))):
- super(KernelUpdateHead, self).__init__()
+ super().__init__()
self.num_classes = num_classes
self.in_channels = in_channels
self.out_channels = out_channels
diff --git a/mmseg/models/decode_heads/lraspp_head.py b/mmseg/models/decode_heads/lraspp_head.py
index f8c693bc4..ba2465f27 100644
--- a/mmseg/models/decode_heads/lraspp_head.py
+++ b/mmseg/models/decode_heads/lraspp_head.py
@@ -22,7 +22,7 @@ class LRASPPHead(BaseDecodeHead):
"""
def __init__(self, branch_channels=(32, 64), **kwargs):
- super(LRASPPHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
if self.input_transform != 'multiple_select':
raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform '
f'must be \'multiple_select\'. But received '
diff --git a/mmseg/models/decode_heads/nl_head.py b/mmseg/models/decode_heads/nl_head.py
index 7903f1ace..0ffcc2a2f 100644
--- a/mmseg/models/decode_heads/nl_head.py
+++ b/mmseg/models/decode_heads/nl_head.py
@@ -26,7 +26,7 @@ class NLHead(FCNHead):
use_scale=True,
mode='embedded_gaussian',
**kwargs):
- super(NLHead, self).__init__(num_convs=2, **kwargs)
+ super().__init__(num_convs=2, **kwargs)
self.reduction = reduction
self.use_scale = use_scale
self.mode = mode
diff --git a/mmseg/models/decode_heads/ocr_head.py b/mmseg/models/decode_heads/ocr_head.py
index 57d91613d..9afe37beb 100644
--- a/mmseg/models/decode_heads/ocr_head.py
+++ b/mmseg/models/decode_heads/ocr_head.py
@@ -18,7 +18,7 @@ class SpatialGatherModule(nn.Module):
"""
def __init__(self, scale):
- super(SpatialGatherModule, self).__init__()
+ super().__init__()
self.scale = scale
def forward(self, feats, probs):
@@ -46,7 +46,7 @@ class ObjectAttentionBlock(_SelfAttentionBlock):
query_downsample = nn.MaxPool2d(kernel_size=scale)
else:
query_downsample = None
- super(ObjectAttentionBlock, self).__init__(
+ super().__init__(
key_in_channels=in_channels,
query_in_channels=in_channels,
channels=channels,
@@ -73,8 +73,7 @@ class ObjectAttentionBlock(_SelfAttentionBlock):
def forward(self, query_feats, key_feats):
"""Forward function."""
- context = super(ObjectAttentionBlock,
- self).forward(query_feats, key_feats)
+ context = super().forward(query_feats, key_feats)
output = self.bottleneck(torch.cat([context, query_feats], dim=1))
if self.query_downsample is not None:
output = resize(query_feats)
@@ -96,7 +95,7 @@ class OCRHead(BaseCascadeDecodeHead):
"""
def __init__(self, ocr_channels, scale=1, **kwargs):
- super(OCRHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.ocr_channels = ocr_channels
self.scale = scale
self.object_context_block = ObjectAttentionBlock(
diff --git a/mmseg/models/decode_heads/point_head.py b/mmseg/models/decode_heads/point_head.py
index a57d531bd..e8e433d66 100644
--- a/mmseg/models/decode_heads/point_head.py
+++ b/mmseg/models/decode_heads/point_head.py
@@ -74,7 +74,7 @@ class PointHead(BaseCascadeDecodeHead):
norm_cfg=None,
act_cfg=dict(type='ReLU', inplace=False),
**kwargs):
- super(PointHead, self).__init__(
+ super().__init__(
input_transform='multiple_select',
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
diff --git a/mmseg/models/decode_heads/psa_head.py b/mmseg/models/decode_heads/psa_head.py
index cf3749f9c..13ee5c58a 100644
--- a/mmseg/models/decode_heads/psa_head.py
+++ b/mmseg/models/decode_heads/psa_head.py
@@ -43,7 +43,7 @@ class PSAHead(BaseDecodeHead):
**kwargs):
if PSAMask is None:
raise RuntimeError('Please install mmcv-full for PSAMask ops')
- super(PSAHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert psa_type in ['collect', 'distribute', 'bi-direction']
self.psa_type = psa_type
self.compact = compact
diff --git a/mmseg/models/decode_heads/psp_head.py b/mmseg/models/decode_heads/psp_head.py
index c22337fde..a40ec41de 100644
--- a/mmseg/models/decode_heads/psp_head.py
+++ b/mmseg/models/decode_heads/psp_head.py
@@ -24,7 +24,7 @@ class PPM(nn.ModuleList):
def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg,
act_cfg, align_corners, **kwargs):
- super(PPM, self).__init__()
+ super().__init__()
self.pool_scales = pool_scales
self.align_corners = align_corners
self.in_channels = in_channels
@@ -72,7 +72,7 @@ class PSPHead(BaseDecodeHead):
"""
def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
- super(PSPHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert isinstance(pool_scales, (list, tuple))
self.pool_scales = pool_scales
self.psp_modules = PPM(
diff --git a/mmseg/models/decode_heads/segmenter_mask_head.py b/mmseg/models/decode_heads/segmenter_mask_head.py
index bcb72ba36..85d27735b 100644
--- a/mmseg/models/decode_heads/segmenter_mask_head.py
+++ b/mmseg/models/decode_heads/segmenter_mask_head.py
@@ -61,8 +61,7 @@ class SegmenterMaskTransformerHead(BaseDecodeHead):
init_std=0.02,
**kwargs,
):
- super(SegmenterMaskTransformerHead, self).__init__(
- in_channels=in_channels, **kwargs)
+ super().__init__(in_channels=in_channels, **kwargs)
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)]
self.layers = ModuleList()
diff --git a/mmseg/models/decode_heads/sep_aspp_head.py b/mmseg/models/decode_heads/sep_aspp_head.py
index a1ffae708..9dba68c9e 100644
--- a/mmseg/models/decode_heads/sep_aspp_head.py
+++ b/mmseg/models/decode_heads/sep_aspp_head.py
@@ -13,7 +13,7 @@ class DepthwiseSeparableASPPModule(ASPPModule):
conv."""
def __init__(self, **kwargs):
- super(DepthwiseSeparableASPPModule, self).__init__(**kwargs)
+ super().__init__(**kwargs)
for i, dilation in enumerate(self.dilations):
if dilation > 1:
self[i] = DepthwiseSeparableConvModule(
@@ -41,7 +41,7 @@ class DepthwiseSeparableASPPHead(ASPPHead):
"""
def __init__(self, c1_in_channels, c1_channels, **kwargs):
- super(DepthwiseSeparableASPPHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
assert c1_in_channels >= 0
self.aspp_modules = DepthwiseSeparableASPPModule(
dilations=self.dilations,
diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py
index 5c8b79bd0..3b15983bc 100644
--- a/mmseg/models/decode_heads/sep_fcn_head.py
+++ b/mmseg/models/decode_heads/sep_fcn_head.py
@@ -32,7 +32,7 @@ class DepthwiseSeparableFCNHead(FCNHead):
"""
def __init__(self, dw_act_cfg=None, **kwargs):
- super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.convs[0] = DepthwiseSeparableConvModule(
self.in_channels,
self.channels,
diff --git a/mmseg/models/decode_heads/setr_mla_head.py b/mmseg/models/decode_heads/setr_mla_head.py
index 887e11bc2..1975991a6 100644
--- a/mmseg/models/decode_heads/setr_mla_head.py
+++ b/mmseg/models/decode_heads/setr_mla_head.py
@@ -21,8 +21,7 @@ class SETRMLAHead(BaseDecodeHead):
"""
def __init__(self, mla_channels=128, up_scale=4, **kwargs):
- super(SETRMLAHead, self).__init__(
- input_transform='multiple_select', **kwargs)
+ super().__init__(input_transform='multiple_select', **kwargs)
self.mla_channels = mla_channels
num_inputs = len(self.in_channels)
diff --git a/mmseg/models/decode_heads/setr_up_head.py b/mmseg/models/decode_heads/setr_up_head.py
index 151e9ebce..9c796d816 100644
--- a/mmseg/models/decode_heads/setr_up_head.py
+++ b/mmseg/models/decode_heads/setr_up_head.py
@@ -41,7 +41,7 @@ class SETRUPHead(BaseDecodeHead):
assert kernel_size in [1, 3], 'kernel_size must be 1 or 3.'
- super(SETRUPHead, self).__init__(init_cfg=init_cfg, **kwargs)
+ super().__init__(init_cfg=init_cfg, **kwargs)
assert isinstance(self.in_channels, int)
diff --git a/mmseg/models/decode_heads/stdc_head.py b/mmseg/models/decode_heads/stdc_head.py
index 0314a0ebd..1c1c21e30 100644
--- a/mmseg/models/decode_heads/stdc_head.py
+++ b/mmseg/models/decode_heads/stdc_head.py
@@ -21,7 +21,7 @@ class STDCHead(FCNHead):
"""
def __init__(self, boundary_threshold=0.1, **kwargs):
- super(STDCHead, self).__init__(**kwargs)
+ super().__init__(**kwargs)
self.boundary_threshold = boundary_threshold
# Using register buffer to make laplacian kernel on the same
# device of `seg_label`.
@@ -93,6 +93,5 @@ class STDCHead(FCNHead):
seg_data_sample.gt_sem_seg = PixelData(data=label)
batch_sample_list.append(seg_data_sample)
- loss = super(STDCHead, self).loss_by_feat(seg_logits,
- batch_sample_list)
+ loss = super().loss_by_feat(seg_logits, batch_sample_list)
return loss
diff --git a/mmseg/models/decode_heads/uper_head.py b/mmseg/models/decode_heads/uper_head.py
index 651236b20..b1ccc3173 100644
--- a/mmseg/models/decode_heads/uper_head.py
+++ b/mmseg/models/decode_heads/uper_head.py
@@ -22,8 +22,7 @@ class UPerHead(BaseDecodeHead):
"""
def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
- super(UPerHead, self).__init__(
- input_transform='multiple_select', **kwargs)
+ super().__init__(input_transform='multiple_select', **kwargs)
# PSP Module
self.psp_modules = PPM(
pool_scales,
diff --git a/mmseg/models/losses/cross_entropy_loss.py b/mmseg/models/losses/cross_entropy_loss.py
index e607248fb..770b99748 100644
--- a/mmseg/models/losses/cross_entropy_loss.py
+++ b/mmseg/models/losses/cross_entropy_loss.py
@@ -223,7 +223,7 @@ class CrossEntropyLoss(nn.Module):
loss_weight=1.0,
loss_name='loss_ce',
avg_non_ignore=False):
- super(CrossEntropyLoss, self).__init__()
+ super().__init__()
assert (use_sigmoid is False) or (use_mask is False)
self.use_sigmoid = use_sigmoid
self.use_mask = use_mask
diff --git a/mmseg/models/losses/dice_loss.py b/mmseg/models/losses/dice_loss.py
index 4a98aaee9..2ee89a81f 100644
--- a/mmseg/models/losses/dice_loss.py
+++ b/mmseg/models/losses/dice_loss.py
@@ -80,7 +80,7 @@ class DiceLoss(nn.Module):
ignore_index=255,
loss_name='loss_dice',
**kwards):
- super(DiceLoss, self).__init__()
+ super().__init__()
self.smooth = smooth
self.exponent = exponent
self.reduction = reduction
diff --git a/mmseg/models/losses/focal_loss.py b/mmseg/models/losses/focal_loss.py
index cd7eff4f6..104d6602c 100644
--- a/mmseg/models/losses/focal_loss.py
+++ b/mmseg/models/losses/focal_loss.py
@@ -78,7 +78,7 @@ def sigmoid_focal_loss(pred,
valid_mask=None,
reduction='mean',
avg_factor=None):
- r"""A warpper of cuda version `Focal Loss
+ r"""A wrapper of cuda version `Focal Loss
`_.
Args:
pred (torch.Tensor): The prediction with shape (N, C), C is the number
@@ -172,7 +172,7 @@ class FocalLoss(nn.Module):
loss item to be included into the backward graph, `loss_` must
be the prefix of the name. Defaults to 'loss_focal'.
"""
- super(FocalLoss, self).__init__()
+ super().__init__()
assert use_sigmoid is True, \
'AssertionError: Only sigmoid focal loss supported now.'
assert reduction in ('none', 'mean', 'sum'), \
diff --git a/mmseg/models/losses/lovasz_loss.py b/mmseg/models/losses/lovasz_loss.py
index e63a0ab8f..b47f9d8a1 100644
--- a/mmseg/models/losses/lovasz_loss.py
+++ b/mmseg/models/losses/lovasz_loss.py
@@ -257,7 +257,7 @@ class LovaszLoss(nn.Module):
class_weight=None,
loss_weight=1.0,
loss_name='loss_lovasz'):
- super(LovaszLoss, self).__init__()
+ super().__init__()
assert loss_type in ('binary', 'multi_class'), "loss_type should be \
'binary' or 'multi_class'."
diff --git a/mmseg/models/losses/tversky_loss.py b/mmseg/models/losses/tversky_loss.py
index 4ad14f783..bfca1af66 100644
--- a/mmseg/models/losses/tversky_loss.py
+++ b/mmseg/models/losses/tversky_loss.py
@@ -88,7 +88,7 @@ class TverskyLoss(nn.Module):
alpha=0.3,
beta=0.7,
loss_name='loss_tversky'):
- super(TverskyLoss, self).__init__()
+ super().__init__()
self.smooth = smooth
self.class_weight = get_class_weight(class_weight)
self.loss_weight = loss_weight
diff --git a/mmseg/models/necks/featurepyramid.py b/mmseg/models/necks/featurepyramid.py
index 40453653d..dc1250d39 100644
--- a/mmseg/models/necks/featurepyramid.py
+++ b/mmseg/models/necks/featurepyramid.py
@@ -23,7 +23,7 @@ class Feature2Pyramid(nn.Module):
embed_dim,
rescales=[4, 2, 1, 0.5],
norm_cfg=dict(type='SyncBN', requires_grad=True)):
- super(Feature2Pyramid, self).__init__()
+ super().__init__()
self.rescales = rescales
self.upsample_4x = None
for k in self.rescales:
diff --git a/mmseg/models/necks/fpn.py b/mmseg/models/necks/fpn.py
index 98fb650f5..ddab74c00 100644
--- a/mmseg/models/necks/fpn.py
+++ b/mmseg/models/necks/fpn.py
@@ -80,7 +80,7 @@ class FPN(BaseModule):
upsample_cfg=dict(mode='nearest'),
init_cfg=dict(
type='Xavier', layer='Conv2d', distribution='uniform')):
- super(FPN, self).__init__(init_cfg)
+ super().__init__(init_cfg)
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
diff --git a/mmseg/models/necks/ic_neck.py b/mmseg/models/necks/ic_neck.py
index c554d227b..9763541e0 100644
--- a/mmseg/models/necks/ic_neck.py
+++ b/mmseg/models/necks/ic_neck.py
@@ -42,7 +42,7 @@ class CascadeFeatureFusion(BaseModule):
act_cfg=dict(type='ReLU'),
align_corners=False,
init_cfg=None):
- super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.align_corners = align_corners
self.conv_low = ConvModule(
low_channels,
@@ -108,7 +108,7 @@ class ICNeck(BaseModule):
act_cfg=dict(type='ReLU'),
align_corners=False,
init_cfg=None):
- super(ICNeck, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert len(in_channels) == 3, 'Length of input channels \
must be 3!'
diff --git a/mmseg/models/necks/jpu.py b/mmseg/models/necks/jpu.py
index 6aad895c3..3ea0fe218 100644
--- a/mmseg/models/necks/jpu.py
+++ b/mmseg/models/necks/jpu.py
@@ -51,7 +51,7 @@ class JPU(BaseModule):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
- super(JPU, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
assert isinstance(in_channels, tuple)
assert isinstance(dilations, tuple)
self.in_channels = in_channels
diff --git a/mmseg/models/necks/mla_neck.py b/mmseg/models/necks/mla_neck.py
index 64a409239..db250aefb 100644
--- a/mmseg/models/necks/mla_neck.py
+++ b/mmseg/models/necks/mla_neck.py
@@ -12,7 +12,7 @@ class MLAModule(nn.Module):
out_channels=256,
norm_cfg=None,
act_cfg=None):
- super(MLAModule, self).__init__()
+ super().__init__()
self.channel_proj = nn.ModuleList()
for i in range(len(in_channels)):
self.channel_proj.append(
@@ -83,7 +83,7 @@ class MLANeck(nn.Module):
norm_layer=dict(type='LN', eps=1e-6, requires_grad=True),
norm_cfg=None,
act_cfg=None):
- super(MLANeck, self).__init__()
+ super().__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
diff --git a/mmseg/models/necks/multilevel_neck.py b/mmseg/models/necks/multilevel_neck.py
index 3f76c5e6a..c997125f2 100644
--- a/mmseg/models/necks/multilevel_neck.py
+++ b/mmseg/models/necks/multilevel_neck.py
@@ -29,7 +29,7 @@ class MultiLevelNeck(nn.Module):
scales=[0.5, 1, 2, 4],
norm_cfg=None,
act_cfg=None):
- super(MultiLevelNeck, self).__init__()
+ super().__init__()
assert isinstance(in_channels, list)
self.in_channels = in_channels
self.out_channels = out_channels
diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py
index d303bdec4..dfceddd99 100644
--- a/mmseg/models/segmentors/base.py
+++ b/mmseg/models/segmentors/base.py
@@ -27,7 +27,7 @@ class BaseSegmentor(BaseModel, metaclass=ABCMeta):
def __init__(self,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None):
- super(BaseSegmentor, self).__init__(
+ super().__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg)
@property
@@ -148,11 +148,9 @@ class BaseSegmentor(BaseModel, metaclass=ABCMeta):
segmentation before normalization.
"""
batch_size, C, H, W = seg_logits.shape
- assert C > 1, ('This post processes does not binary segmentation, and '
- f'channels `seg_logtis` must be > 1 but got {C}')
if data_samples is None:
- data_samples = []
+ data_samples = [SegDataSample() for _ in range(batch_size)]
only_prediction = True
else:
only_prediction = False
@@ -174,23 +172,19 @@ class BaseSegmentor(BaseModel, metaclass=ABCMeta):
mode='bilinear',
align_corners=self.align_corners,
warning=False).squeeze(0)
- # i_seg_logits shape is C, H, W with original shape
- i_seg_pred = i_seg_logits.argmax(dim=0, keepdim=True)
- data_samples[i].set_data({
- 'seg_logits':
- PixelData(**{'data': i_seg_logits}),
- 'pred_sem_seg':
- PixelData(**{'data': i_seg_pred})
- })
else:
i_seg_logits = seg_logits[i]
+
+ if C > 1:
i_seg_pred = i_seg_logits.argmax(dim=0, keepdim=True)
- prediction = SegDataSample()
- prediction.set_data({
- 'seg_logits':
- PixelData(**{'data': i_seg_logits}),
- 'pred_sem_seg':
- PixelData(**{'data': i_seg_pred})
- })
- data_samples.append(prediction)
+ else:
+ i_seg_pred = (i_seg_logits >
+ self.decode_head.threshold).to(i_seg_logits)
+ data_samples[i].set_data({
+ 'seg_logits':
+ PixelData(**{'data': i_seg_logits}),
+ 'pred_sem_seg':
+ PixelData(**{'data': i_seg_pred})
+ })
+
return data_samples
diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py
index 33bcbddd8..f76e66f93 100644
--- a/mmseg/models/segmentors/cascade_encoder_decoder.py
+++ b/mmseg/models/segmentors/cascade_encoder_decoder.py
@@ -48,7 +48,7 @@ class CascadeEncoderDecoder(EncoderDecoder):
pretrained: Optional[str] = None,
init_cfg: OptMultiConfig = None):
self.num_stages = num_stages
- super(CascadeEncoderDecoder, self).__init__(
+ super().__init__(
backbone=backbone,
decode_head=decode_head,
neck=neck,
diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py
index 0f678957d..c4f44ba00 100644
--- a/mmseg/models/segmentors/encoder_decoder.py
+++ b/mmseg/models/segmentors/encoder_decoder.py
@@ -78,7 +78,7 @@ class EncoderDecoder(BaseSegmentor):
data_preprocessor: OptConfigType = None,
pretrained: Optional[str] = None,
init_cfg: OptMultiConfig = None):
- super(EncoderDecoder, self).__init__(
+ super().__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg)
if pretrained is not None:
assert backbone.get('pretrained') is None, \
@@ -100,6 +100,7 @@ class EncoderDecoder(BaseSegmentor):
self.decode_head = MODELS.build(decode_head)
self.align_corners = self.decode_head.align_corners
self.num_classes = self.decode_head.num_classes
+ self.out_channels = self.decode_head.out_channels
def _init_auxiliary_head(self, auxiliary_head: ConfigType) -> None:
"""Initialize ``auxiliary_head``"""
diff --git a/mmseg/models/utils/embed.py b/mmseg/models/utils/embed.py
index adabcf29c..aef0a40b0 100644
--- a/mmseg/models/utils/embed.py
+++ b/mmseg/models/utils/embed.py
@@ -42,7 +42,7 @@ class AdaptivePadding(nn.Module):
def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):
- super(AdaptivePadding, self).__init__()
+ super().__init__()
assert padding in ('same', 'corner')
@@ -120,7 +120,7 @@ class PatchEmbed(BaseModule):
norm_cfg=None,
input_size=None,
init_cfg=None):
- super(PatchEmbed, self).__init__(init_cfg=init_cfg)
+ super().__init__(init_cfg=init_cfg)
self.embed_dims = embed_dims
if stride is None:
diff --git a/mmseg/models/utils/encoding.py b/mmseg/models/utils/encoding.py
index f397cc54e..ee4f0574f 100644
--- a/mmseg/models/utils/encoding.py
+++ b/mmseg/models/utils/encoding.py
@@ -16,7 +16,7 @@ class Encoding(nn.Module):
"""
def __init__(self, channels, num_codes):
- super(Encoding, self).__init__()
+ super().__init__()
# init codewords and smoothing factor
self.channels, self.num_codes = channels, num_codes
std = 1. / ((num_codes * channels)**0.5)
diff --git a/mmseg/models/utils/inverted_residual.py b/mmseg/models/utils/inverted_residual.py
index c9cda7682..56190b3bf 100644
--- a/mmseg/models/utils/inverted_residual.py
+++ b/mmseg/models/utils/inverted_residual.py
@@ -40,7 +40,7 @@ class InvertedResidual(nn.Module):
act_cfg=dict(type='ReLU6'),
with_cp=False,
**kwargs):
- super(InvertedResidual, self).__init__()
+ super().__init__()
self.stride = stride
assert stride in [1, 2], f'stride must in [1, 2]. ' \
f'But received {stride}.'
@@ -138,7 +138,7 @@ class InvertedResidualV3(nn.Module):
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
with_cp=False):
- super(InvertedResidualV3, self).__init__()
+ super().__init__()
self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
assert stride in [1, 2]
self.with_cp = with_cp
diff --git a/mmseg/models/utils/res_layer.py b/mmseg/models/utils/res_layer.py
index 2847d189b..3dd7a6f75 100644
--- a/mmseg/models/utils/res_layer.py
+++ b/mmseg/models/utils/res_layer.py
@@ -93,4 +93,4 @@ class ResLayer(Sequential):
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
**kwargs))
- super(ResLayer, self).__init__(*layers)
+ super().__init__(*layers)
diff --git a/mmseg/models/utils/se_layer.py b/mmseg/models/utils/se_layer.py
index 34085753e..0ff632cfe 100644
--- a/mmseg/models/utils/se_layer.py
+++ b/mmseg/models/utils/se_layer.py
@@ -30,7 +30,7 @@ class SELayer(nn.Module):
conv_cfg=None,
act_cfg=(dict(type='ReLU'),
dict(type='HSigmoid', bias=3.0, divisor=6.0))):
- super(SELayer, self).__init__()
+ super().__init__()
if isinstance(act_cfg, dict):
act_cfg = (act_cfg, act_cfg)
assert len(act_cfg) == 2
diff --git a/mmseg/models/utils/self_attention_block.py b/mmseg/models/utils/self_attention_block.py
index 245de62b1..5bb6e8284 100644
--- a/mmseg/models/utils/self_attention_block.py
+++ b/mmseg/models/utils/self_attention_block.py
@@ -36,7 +36,7 @@ class SelfAttentionBlock(nn.Module):
key_downsample, key_query_num_convs, value_out_num_convs,
key_query_norm, value_out_norm, matmul_norm, with_out,
conv_cfg, norm_cfg, act_cfg):
- super(SelfAttentionBlock, self).__init__()
+ super().__init__()
if share_key_query:
assert key_in_channels == query_in_channels
self.key_in_channels = key_in_channels
diff --git a/mmseg/models/utils/up_conv_block.py b/mmseg/models/utils/up_conv_block.py
index d8396d9c2..4fa3b598d 100644
--- a/mmseg/models/utils/up_conv_block.py
+++ b/mmseg/models/utils/up_conv_block.py
@@ -57,7 +57,7 @@ class UpConvBlock(nn.Module):
upsample_cfg=dict(type='InterpConv'),
dcn=None,
plugins=None):
- super(UpConvBlock, self).__init__()
+ super().__init__()
assert dcn is None, 'Not implemented yet.'
assert plugins is None, 'Not implemented yet.'
diff --git a/mmseg/models/utils/wrappers.py b/mmseg/models/utils/wrappers.py
index ce67e4beb..abbd0c029 100644
--- a/mmseg/models/utils/wrappers.py
+++ b/mmseg/models/utils/wrappers.py
@@ -34,7 +34,7 @@ class Upsample(nn.Module):
scale_factor=None,
mode='nearest',
align_corners=None):
- super(Upsample, self).__init__()
+ super().__init__()
self.size = size
if isinstance(scale_factor, tuple):
self.scale_factor = tuple(float(factor) for factor in scale_factor)
diff --git a/mmseg/ops/encoding.py b/mmseg/ops/encoding.py
deleted file mode 100644
index f397cc54e..000000000
--- a/mmseg/ops/encoding.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-
-class Encoding(nn.Module):
- """Encoding Layer: a learnable residual encoder.
-
- Input is of shape (batch_size, channels, height, width).
- Output is of shape (batch_size, num_codes, channels).
-
- Args:
- channels: dimension of the features or feature channels
- num_codes: number of code words
- """
-
- def __init__(self, channels, num_codes):
- super(Encoding, self).__init__()
- # init codewords and smoothing factor
- self.channels, self.num_codes = channels, num_codes
- std = 1. / ((num_codes * channels)**0.5)
- # [num_codes, channels]
- self.codewords = nn.Parameter(
- torch.empty(num_codes, channels,
- dtype=torch.float).uniform_(-std, std),
- requires_grad=True)
- # [num_codes]
- self.scale = nn.Parameter(
- torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0),
- requires_grad=True)
-
- @staticmethod
- def scaled_l2(x, codewords, scale):
- num_codes, channels = codewords.size()
- batch_size = x.size(0)
- reshaped_scale = scale.view((1, 1, num_codes))
- expanded_x = x.unsqueeze(2).expand(
- (batch_size, x.size(1), num_codes, channels))
- reshaped_codewords = codewords.view((1, 1, num_codes, channels))
-
- scaled_l2_norm = reshaped_scale * (
- expanded_x - reshaped_codewords).pow(2).sum(dim=3)
- return scaled_l2_norm
-
- @staticmethod
- def aggregate(assignment_weights, x, codewords):
- num_codes, channels = codewords.size()
- reshaped_codewords = codewords.view((1, 1, num_codes, channels))
- batch_size = x.size(0)
-
- expanded_x = x.unsqueeze(2).expand(
- (batch_size, x.size(1), num_codes, channels))
- encoded_feat = (assignment_weights.unsqueeze(3) *
- (expanded_x - reshaped_codewords)).sum(dim=1)
- return encoded_feat
-
- def forward(self, x):
- assert x.dim() == 4 and x.size(1) == self.channels
- # [batch_size, channels, height, width]
- batch_size = x.size(0)
- # [batch_size, height x width, channels]
- x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
- # assignment_weights: [batch_size, channels, num_codes]
- assignment_weights = F.softmax(
- self.scaled_l2(x, self.codewords, self.scale), dim=2)
- # aggregate
- encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
- return encoded_feat
-
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \
- f'x{self.channels})'
- return repr_str
diff --git a/mmseg/ops/wrappers.py b/mmseg/ops/wrappers.py
deleted file mode 100644
index ce67e4beb..000000000
--- a/mmseg/ops/wrappers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import warnings
-
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-def resize(input,
- size=None,
- scale_factor=None,
- mode='nearest',
- align_corners=None,
- warning=True):
- if warning:
- if size is not None and align_corners:
- input_h, input_w = tuple(int(x) for x in input.shape[2:])
- output_h, output_w = tuple(int(x) for x in size)
- if output_h > input_h or output_w > output_h:
- if ((output_h > 1 and output_w > 1 and input_h > 1
- and input_w > 1) and (output_h - 1) % (input_h - 1)
- and (output_w - 1) % (input_w - 1)):
- warnings.warn(
- f'When align_corners={align_corners}, '
- 'the output would more aligned if '
- f'input size {(input_h, input_w)} is `x+1` and '
- f'out size {(output_h, output_w)} is `nx+1`')
- return F.interpolate(input, size, scale_factor, mode, align_corners)
-
-
-class Upsample(nn.Module):
-
- def __init__(self,
- size=None,
- scale_factor=None,
- mode='nearest',
- align_corners=None):
- super(Upsample, self).__init__()
- self.size = size
- if isinstance(scale_factor, tuple):
- self.scale_factor = tuple(float(factor) for factor in scale_factor)
- else:
- self.scale_factor = float(scale_factor) if scale_factor else None
- self.mode = mode
- self.align_corners = align_corners
-
- def forward(self, x):
- if not self.size:
- size = [int(t * self.scale_factor) for t in x.shape[-2:]]
- else:
- size = self.size
- return resize(x, size, None, self.mode, self.align_corners)
diff --git a/mmseg/structures/sampler/ohem_pixel_sampler.py b/mmseg/structures/sampler/ohem_pixel_sampler.py
index e5016ffb6..a974273ca 100644
--- a/mmseg/structures/sampler/ohem_pixel_sampler.py
+++ b/mmseg/structures/sampler/ohem_pixel_sampler.py
@@ -23,7 +23,7 @@ class OHEMPixelSampler(BasePixelSampler):
"""
def __init__(self, context, thresh=None, min_kept=100000):
- super(OHEMPixelSampler, self).__init__()
+ super().__init__()
self.context = context
assert min_kept > 1
self.thresh = thresh
diff --git a/mmseg/structures/seg_data_sample.py b/mmseg/structures/seg_data_sample.py
index d234a4f20..ce68b5474 100644
--- a/mmseg/structures/seg_data_sample.py
+++ b/mmseg/structures/seg_data_sample.py
@@ -16,7 +16,7 @@ class SegDataSample(BaseDataElement):
>>> import torch
>>> import numpy as np
>>> from mmengine.structures import PixelData
- >>> from mmseg.core import SegDataSample
+ >>> from mmseg.structures import SegDataSample
>>> data_sample = SegDataSample()
>>> img_meta = dict(img_shape=(4, 4, 3),
diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py
index 3bb1ede52..0fd58218d 100644
--- a/mmseg/utils/__init__.py
+++ b/mmseg/utils/__init__.py
@@ -10,6 +10,7 @@ from .class_names import (ade_classes, ade_palette, cityscapes_classes,
voc_palette)
# yapf: enable
from .collect_env import collect_env
+from .io import datafrombytes
from .misc import add_prefix, stack_batch
from .set_env import register_all_modules
from .typing import (ConfigType, ForwardResults, MultiConfig, OptConfigType,
@@ -25,5 +26,6 @@ __all__ = [
'vaihingen_classes', 'isaid_classes', 'stare_classes',
'cityscapes_palette', 'ade_palette', 'voc_palette', 'cocostuff_palette',
'loveda_palette', 'potsdam_palette', 'vaihingen_palette', 'isaid_palette',
- 'stare_palette', 'dataset_aliases', 'get_classes', 'get_palette'
+ 'stare_palette', 'dataset_aliases', 'get_classes', 'get_palette',
+ 'datafrombytes'
]
diff --git a/mmseg/utils/class_names.py b/mmseg/utils/class_names.py
index dcfa42138..a62eaac97 100644
--- a/mmseg/utils/class_names.py
+++ b/mmseg/utils/class_names.py
@@ -265,6 +265,26 @@ def stare_palette():
return [[120, 120, 120], [6, 230, 230]]
+def lip_classes():
+ """LIP class names for external use."""
+ return [
+ 'background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes',
+ 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt',
+ 'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe',
+ 'rightShoe'
+ ]
+
+
+def lip_palette():
+ """LIP palette for external use."""
+ return [
+ 'Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'UpperClothes',
+ 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt',
+ 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe',
+ 'Right-shoe'
+ ]
+
+
dataset_aliases = {
'cityscapes': ['cityscapes'],
'ade': ['ade', 'ade20k'],
@@ -278,7 +298,8 @@ dataset_aliases = {
'coco_stuff164k'
],
'isaid': ['isaid', 'iSAID'],
- 'stare': ['stare', 'STARE']
+ 'stare': ['stare', 'STARE'],
+ 'lip': ['LIP', 'lip']
}
diff --git a/mmseg/utils/collect_env.py b/mmseg/utils/collect_env.py
index 22b56992d..d5d6ea290 100644
--- a/mmseg/utils/collect_env.py
+++ b/mmseg/utils/collect_env.py
@@ -15,4 +15,4 @@ def collect_env():
if __name__ == '__main__':
for name, val in collect_env().items():
- print('{}: {}'.format(name, val))
+ print(f'{name}: {val}')
diff --git a/mmseg/utils/io.py b/mmseg/utils/io.py
new file mode 100644
index 000000000..d03517401
--- /dev/null
+++ b/mmseg/utils/io.py
@@ -0,0 +1,38 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import gzip
+import io
+import pickle
+
+import numpy as np
+
+
+def datafrombytes(content: bytes, backend: str = 'numpy') -> np.ndarray:
+ """Data decoding from bytes.
+
+ Args:
+ content (bytes): The data bytes got from files or other streams.
+ backend (str): The data decoding backend type. Options are 'numpy',
+ 'nifti' and 'pickle'. Defaults to 'numpy'.
+
+ Returns:
+ numpy.ndarray: Loaded data array.
+ """
+ if backend == 'pickle':
+ data = pickle.loads(content)
+ else:
+ with io.BytesIO(content) as f:
+ if backend == 'nifti':
+ f = gzip.open(f)
+ try:
+ from nibabel import FileHolder, Nifti1Image
+ except ImportError:
+ print('nifti files io depends on nibabel, please run'
+ '`pip install nibabel` to install it')
+ fh = FileHolder(fileobj=f)
+ data = Nifti1Image.from_file_map({'header': fh, 'image': fh})
+ data = Nifti1Image.from_bytes(data.to_bytes()).get_fdata()
+ elif backend == 'numpy':
+ data = np.load(f)
+ else:
+ raise ValueError
+ return data
diff --git a/mmseg/utils/misc.py b/mmseg/utils/misc.py
index 89469ba41..4413234fb 100644
--- a/mmseg/utils/misc.py
+++ b/mmseg/utils/misc.py
@@ -52,12 +52,12 @@ def stack_batch(inputs: List[torch.Tensor],
"""
assert isinstance(inputs, list), \
f'Expected input type to be list, but got {type(inputs)}'
- assert len(set([tensor.ndim for tensor in inputs])) == 1, \
+ assert len({tensor.ndim for tensor in inputs}) == 1, \
f'Expected the dimensions of all inputs must be the same, ' \
f'but got {[tensor.ndim for tensor in inputs]}'
assert inputs[0].ndim == 3, f'Expected tensor dimension to be 3, ' \
f'but got {inputs[0].ndim}'
- assert len(set([tensor.shape[0] for tensor in inputs])) == 1, \
+ assert len({tensor.shape[0] for tensor in inputs}) == 1, \
f'Expected the channels of all inputs must be the same, ' \
f'but got {[tensor.shape[0] for tensor in inputs]}'
diff --git a/mmseg/version.py b/mmseg/version.py
index 6ee77798f..840dca669 100644
--- a/mmseg/version.py
+++ b/mmseg/version.py
@@ -1,6 +1,6 @@
# Copyright (c) Open-MMLab. All rights reserved.
-__version__ = '1.0.0rc0'
+__version__ = '1.0.0rc1'
def parse_version_info(version_str):
diff --git a/mmseg/visualization/local_visualizer.py b/mmseg/visualization/local_visualizer.py
index fc34ea5d1..070b06b73 100644
--- a/mmseg/visualization/local_visualizer.py
+++ b/mmseg/visualization/local_visualizer.py
@@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple
+import mmcv
import numpy as np
from mmengine.dist import master_only
from mmengine.structures import PixelData
@@ -12,7 +13,7 @@ from mmseg.structures import SegDataSample
@VISUALIZERS.register_module()
class SegLocalVisualizer(Visualizer):
- """MMSegmentation Local Visualizer.
+ """Local Visualizer.
Args:
name (str): Name of the instance. Defaults to 'visualizer'.
@@ -99,15 +100,18 @@ class SegLocalVisualizer(Visualizer):
return self.get_image()
@master_only
- def add_datasample(self,
- name: str,
- image: np.ndarray,
- data_sample: Optional[SegDataSample] = None,
- draw_gt: bool = True,
- draw_pred: bool = True,
- show: bool = False,
- wait_time: float = 0,
- step: int = 0) -> None:
+ def add_datasample(
+ self,
+ name: str,
+ image: np.ndarray,
+ data_sample: Optional[SegDataSample] = None,
+ draw_gt: bool = True,
+ draw_pred: bool = True,
+ show: bool = False,
+ wait_time: float = 0,
+ # TODO: Supported in mmengine's Viusalizer.
+ out_file: Optional[str] = None,
+ step: int = 0) -> None:
"""Draw datasample and save to all backends.
- If GT and prediction are plotted at the same time, they are
@@ -115,6 +119,9 @@ class SegLocalVisualizer(Visualizer):
ground truth and the right image is the prediction.
- If ``show`` is True, all storage backends are ignored, and
the images will be displayed in a local window.
+ - If ``out_file`` is specified, the drawn image will be
+ saved to ``out_file``. it is usually used when the display
+ is not available.
Args:
name (str): The image identifier.
@@ -128,6 +135,7 @@ class SegLocalVisualizer(Visualizer):
Defaults to True.
show (bool): Whether to display the drawn image. Default to False.
wait_time (float): The interval of show (s). Defaults to 0.
+ out_file (str): Path to output file. Defaults to None.
step (int): Global step value to record. Defaults to 0.
"""
classes = self.dataset_meta.get('classes', None)
@@ -166,5 +174,8 @@ class SegLocalVisualizer(Visualizer):
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
+
+ if out_file is not None:
+ mmcv.imwrite(drawn_img, out_file)
else:
self.add_image(name, drawn_img, step)
diff --git a/model-index.yml b/model-index.yml
index 2053fd049..b087a7294 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -30,6 +30,7 @@ Import:
- configs/nonlocal_net/nonlocal_net.yml
- configs/ocrnet/ocrnet.yml
- configs/point_rend/point_rend.yml
+- configs/poolformer/poolformer.yml
- configs/psanet/psanet.yml
- configs/pspnet/pspnet.yml
- configs/resnest/resnest.yml
diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt
index 68f60cdad..ef11eb677 100644
--- a/requirements/mminstall.txt
+++ b/requirements/mminstall.txt
@@ -1,3 +1,3 @@
mmcls>=1.0.0rc0
mmcv>=2.0.0rc1,<2.1.0
-mmengine>=0.1
+mmengine>=0.1.0,<1.0.0
diff --git a/requirements/optional.txt b/requirements/optional.txt
index 47fa59331..5eca64924 100644
--- a/requirements/optional.txt
+++ b/requirements/optional.txt
@@ -1 +1,2 @@
cityscapesscripts
+nibabel
diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt
index 22a894bd7..af6029b9a 100644
--- a/requirements/readthedocs.txt
+++ b/requirements/readthedocs.txt
@@ -1,4 +1,5 @@
-mmcv
+mmcv>=2.0.0rc0
+mmengine
prettytable
torch
torchvision
diff --git a/resources/cascade_encoder_decoder_dataflow.png b/resources/cascade_encoder_decoder_dataflow.png
new file mode 100644
index 000000000..28e33d052
Binary files /dev/null and b/resources/cascade_encoder_decoder_dataflow.png differ
diff --git a/resources/encoder_decoder_dataflow.png b/resources/encoder_decoder_dataflow.png
new file mode 100644
index 000000000..33a8a4916
Binary files /dev/null and b/resources/encoder_decoder_dataflow.png differ
diff --git a/resources/test_step.png b/resources/test_step.png
new file mode 100644
index 000000000..4d52351b8
Binary files /dev/null and b/resources/test_step.png differ
diff --git a/resources/train_step.png b/resources/train_step.png
new file mode 100644
index 000000000..1e06105a0
Binary files /dev/null and b/resources/train_step.png differ
diff --git a/setup.cfg b/setup.cfg
index 23cb09e69..dc5ea0711 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -16,4 +16,4 @@ default_section = THIRDPARTY
skip = *.po,*.ts,*.ipynb
count =
quiet-level = 3
-ignore-words-list = formating,sur,hist,dota
+ignore-words-list = formating,sur,hist,dota,warmup
diff --git a/setup.py b/setup.py
index 91afefb6e..79ecaa54b 100755
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ version_file = 'mmseg/version.py'
def get_version():
- with open(version_file, 'r') as f:
+ with open(version_file) as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
@@ -74,12 +74,11 @@ def parse_requirements(fname='requirements.txt', with_version=True):
yield info
def parse_require_file(fpath):
- with open(fpath, 'r') as f:
+ with open(fpath) as f:
for line in f.readlines():
line = line.strip()
if line and not line.startswith('#'):
- for info in parse_line(line):
- yield info
+ yield from parse_line(line)
def gen_packages_items():
if exists(require_fpath):
@@ -195,6 +194,7 @@ if __name__ == '__main__':
'tests': parse_requirements('requirements/tests.txt'),
'build': parse_requirements('requirements/build.txt'),
'optional': parse_requirements('requirements/optional.txt'),
+ 'mim': parse_requirements('requirements/mminstall.txt'),
},
ext_modules=[],
zip_safe=False)
diff --git a/tests/data/biomedical.nii.gz b/tests/data/biomedical.nii.gz
new file mode 100755
index 000000000..32f3276d9
Binary files /dev/null and b/tests/data/biomedical.nii.gz differ
diff --git a/tests/data/biomedical.npy b/tests/data/biomedical.npy
new file mode 100644
index 000000000..481944493
Binary files /dev/null and b/tests/data/biomedical.npy differ
diff --git a/tests/data/biomedical.pkl b/tests/data/biomedical.pkl
new file mode 100644
index 000000000..48c32a7ce
Binary files /dev/null and b/tests/data/biomedical.pkl differ
diff --git a/tests/data/biomedical_ann.nii.gz b/tests/data/biomedical_ann.nii.gz
new file mode 100755
index 000000000..5eae8a4a4
Binary files /dev/null and b/tests/data/biomedical_ann.nii.gz differ
diff --git a/tests/data/dataset.json b/tests/data/dataset.json
new file mode 100755
index 000000000..09b01235e
--- /dev/null
+++ b/tests/data/dataset.json
@@ -0,0 +1,30 @@
+{
+ "name": "BRATS",
+ "description": "Gliomas segmentation tumour and oedema in on brain images",
+ "tensorImageSize": "4D",
+ "modality": {
+ "0": "FLAIR",
+ "1": "T1w",
+ "2": "t1gd",
+ "3": "T2w"
+ },
+ "labels": {
+ "0": "background",
+ "1": "edema",
+ "2": "non-enhancing tumor",
+ "3": "enhancing tumour"
+ },
+ "numTraining": 484,
+ "numTest": 266,
+ "training": [
+ {
+ "image": "./imagesTr/BRATS_457.nii.gz",
+ "label": "./labelsTr/BRATS_457.nii.gz"
+ }
+ ],
+ "test": [
+ "./imagesTs/BRATS_568.nii.gz",
+ "./imagesTs/BRATS_515.nii.gz",
+ "./imagesTs/BRATS_576.nii.gz"
+ ]
+}
diff --git a/tests/data/pseudo_lip_dataset/train_images/684_2150041.jpg b/tests/data/pseudo_lip_dataset/train_images/684_2150041.jpg
new file mode 100644
index 000000000..d6ac13a99
Binary files /dev/null and b/tests/data/pseudo_lip_dataset/train_images/684_2150041.jpg differ
diff --git a/tests/data/pseudo_lip_dataset/train_segmentations/684_2150041.png b/tests/data/pseudo_lip_dataset/train_segmentations/684_2150041.png
new file mode 100644
index 000000000..47271e2ca
Binary files /dev/null and b/tests/data/pseudo_lip_dataset/train_segmentations/684_2150041.png differ
diff --git a/tests/data/pseudo_lip_dataset/val_images/86_185913.jpg b/tests/data/pseudo_lip_dataset/val_images/86_185913.jpg
new file mode 100644
index 000000000..7f66845a7
Binary files /dev/null and b/tests/data/pseudo_lip_dataset/val_images/86_185913.jpg differ
diff --git a/tests/data/pseudo_lip_dataset/val_segmentations/86_185913.png b/tests/data/pseudo_lip_dataset/val_segmentations/86_185913.png
new file mode 100644
index 000000000..0708e5390
Binary files /dev/null and b/tests/data/pseudo_lip_dataset/val_segmentations/86_185913.png differ
diff --git a/tests/test_config.py b/tests/test_config.py
index d644a34ba..bd664ed74 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -31,7 +31,7 @@ def test_config_build_segmentor():
"""Test that all segmentation models defined in the configs can be
initialized."""
config_dpath = _get_config_directory()
- print('Found config_dpath = {!r}'.format(config_dpath))
+ print(f'Found config_dpath = {config_dpath!r}')
config_fpaths = []
# one config each sub folder
@@ -42,20 +42,20 @@ def test_config_build_segmentor():
config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
config_names = [relpath(p, config_dpath) for p in config_fpaths]
- print('Using {} config files'.format(len(config_names)))
+ print(f'Using {len(config_names)} config files')
for config_fname in config_names:
config_fpath = join(config_dpath, config_fname)
config_mod = Config.fromfile(config_fpath)
config_mod.model
- print('Building segmentor, config_fpath = {!r}'.format(config_fpath))
+ print(f'Building segmentor, config_fpath = {config_fpath!r}')
# Remove pretrained keys to allow for testing in an offline environment
if 'pretrained' in config_mod.model:
config_mod.model['pretrained'] = None
- print('building {}'.format(config_fname))
+ print(f'building {config_fname}')
segmentor = build_segmentor(config_mod.model)
assert segmentor is not None
@@ -72,28 +72,27 @@ def test_config_data_pipeline():
register_all_modules()
config_dpath = _get_config_directory()
- print('Found config_dpath = {!r}'.format(config_dpath))
+ print(f'Found config_dpath = {config_dpath!r}')
import glob
config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
config_names = [relpath(p, config_dpath) for p in config_fpaths]
- print('Using {} config files'.format(len(config_names)))
+ print(f'Using {len(config_names)} config files')
for config_fname in config_names:
config_fpath = join(config_dpath, config_fname)
- print(
- 'Building data pipeline, config_fpath = {!r}'.format(config_fpath))
+ print(f'Building data pipeline, config_fpath = {config_fpath!r}')
config_mod = Config.fromfile(config_fpath)
# remove loading pipeline
load_img_pipeline = config_mod.train_pipeline.pop(0)
to_float32 = load_img_pipeline.get('to_float32', False)
- config_mod.train_pipeline.pop(0)
- config_mod.test_pipeline.pop(0)
+ del config_mod.train_pipeline[0]
+ del config_mod.test_pipeline[0]
# remove loading annotation in test pipeline
- config_mod.test_pipeline.pop(1)
+ del config_mod.test_pipeline[-2]
train_pipeline = Compose(config_mod.train_pipeline)
test_pipeline = Compose(config_mod.test_pipeline)
@@ -112,7 +111,7 @@ def test_config_data_pipeline():
gt_seg_map=seg)
results['seg_fields'] = ['gt_seg_map']
- print('Test training data pipeline: \n{!r}'.format(train_pipeline))
+ print(f'Test training data pipeline: \n{train_pipeline!r}')
output_results = train_pipeline(results)
assert output_results is not None
@@ -121,9 +120,8 @@ def test_config_data_pipeline():
ori_filename='test_img.png',
img=img,
img_shape=img.shape,
- ori_shape=img.shape,
- )
- print('Test testing data pipeline: \n{!r}'.format(test_pipeline))
+ ori_shape=img.shape)
+ print(f'Test testing data pipeline: \n{test_pipeline!r}')
output_results = test_pipeline(results)
assert output_results is not None
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 302b5aac9..b90fc8173 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -7,8 +7,9 @@ from unittest.mock import MagicMock
import pytest
from mmseg.datasets import (ADE20KDataset, BaseSegDataset, CityscapesDataset,
- COCOStuffDataset, ISPRSDataset, LoveDADataset,
- PascalVOCDataset, PotsdamDataset, iSAIDDataset)
+ COCOStuffDataset, DecathlonDataset, ISPRSDataset,
+ LIPDataset, LoveDADataset, PascalVOCDataset,
+ PotsdamDataset, iSAIDDataset)
from mmseg.registry import DATASETS
from mmseg.utils import get_classes, get_palette
@@ -242,6 +243,41 @@ def test_isaid():
assert len(test_dataset) == 1
+def test_decathlon():
+ data_root = osp.join(osp.dirname(__file__), '../data')
+ # test load training dataset
+ test_dataset = DecathlonDataset(
+ pipeline=[], data_root=data_root, ann_file='dataset.json')
+ assert len(test_dataset) == 1
+
+ # test load test dataset
+ test_dataset = DecathlonDataset(
+ pipeline=[],
+ data_root=data_root,
+ ann_file='dataset.json',
+ test_mode=True)
+ assert len(test_dataset) == 3
+
+
+def test_lip():
+ data_root = osp.join(osp.dirname(__file__), '../data/pseudo_lip_dataset')
+ # train load training dataset
+ train_dataset = LIPDataset(
+ pipeline=[],
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='train_images', seg_map_path='train_segmentations'))
+ assert len(train_dataset) == 1
+
+ # test load training dataset
+ test_dataset = LIPDataset(
+ pipeline=[],
+ data_root=data_root,
+ data_prefix=dict(
+ img_path='val_images', seg_map_path='val_segmentations'))
+ assert len(test_dataset) == 1
+
+
@pytest.mark.parametrize('dataset, classes', [
('ADE20KDataset', ('wall', 'building')),
('CityscapesDataset', ('road', 'sidewalk')),
diff --git a/tests/test_datasets/test_dataset_builder.py b/tests/test_datasets/test_dataset_builder.py
index 5e70726aa..099c5b1df 100644
--- a/tests/test_datasets/test_dataset_builder.py
+++ b/tests/test_datasets/test_dataset_builder.py
@@ -11,7 +11,7 @@ register_all_modules()
@DATASETS.register_module()
-class ToyDataset(object):
+class ToyDataset:
def __init__(self, cnt=0):
self.cnt = cnt
diff --git a/tests/test_datasets/test_loading.py b/tests/test_datasets/test_loading.py
index bccba9379..29a594b4a 100644
--- a/tests/test_datasets/test_loading.py
+++ b/tests/test_datasets/test_loading.py
@@ -7,10 +7,14 @@ import mmcv
import numpy as np
from mmcv.transforms import LoadImageFromFile
-from mmseg.datasets.transforms import LoadAnnotations, LoadImageFromNDArray
+from mmseg.datasets.transforms import (LoadAnnotations,
+ LoadBiomedicalAnnotation,
+ LoadBiomedicalData,
+ LoadBiomedicalImageFromFile,
+ LoadImageFromNDArray)
-class TestLoading(object):
+class TestLoading:
@classmethod
def setup_class(cls):
@@ -185,3 +189,53 @@ class TestLoading(object):
"color_type='color', "
"imdecode_backend='cv2', "
"file_client_args={'backend': 'disk'})")
+
+ def test_load_biomedical_img(self):
+ results = dict(
+ img_path=osp.join(self.data_prefix, 'biomedical.nii.gz'))
+ transform = LoadBiomedicalImageFromFile()
+ results = transform(copy.deepcopy(results))
+ assert results['img_path'] == osp.join(self.data_prefix,
+ 'biomedical.nii.gz')
+ assert len(results['img'].shape) == 4
+ assert results['img'].dtype == np.float32
+ assert results['ori_shape'] == results['img'].shape[1:]
+ assert repr(transform) == ('LoadBiomedicalImageFromFile('
+ "decode_backend='nifti', "
+ 'to_xyz=False, '
+ 'to_float32=True, '
+ "file_client_args={'backend': 'disk'})")
+
+ def test_load_biomedical_annotation(self):
+ results = dict(
+ seg_map_path=osp.join(self.data_prefix, 'biomedical_ann.nii.gz'))
+ transform = LoadBiomedicalAnnotation()
+ results = transform(copy.deepcopy(results))
+ assert len(results['gt_seg_map'].shape) == 3
+ assert results['gt_seg_map'].dtype == np.float32
+
+ def test_load_biomedical_data(self):
+ input_results = dict(
+ img_path=osp.join(self.data_prefix, 'biomedical.npy'))
+ transform = LoadBiomedicalData(with_seg=True)
+ results = transform(copy.deepcopy(input_results))
+ assert results['img_path'] == osp.join(self.data_prefix,
+ 'biomedical.npy')
+ assert results['img'][0].shape == results['gt_seg_map'].shape
+ assert results['img'].dtype == np.float32
+ assert results['ori_shape'] == results['img'].shape[1:]
+ assert repr(transform) == ('LoadBiomedicalData('
+ 'with_seg=True, '
+ "decode_backend='numpy', "
+ 'to_xyz=False, '
+ "file_client_args={'backend': 'disk'})")
+
+ transform = LoadBiomedicalData(with_seg=False)
+ results = transform(copy.deepcopy(input_results))
+ assert len(results['img'].shape) == 4
+ assert results.get('gt_seg_map') is None
+ assert repr(transform) == ('LoadBiomedicalData('
+ 'with_seg=False, '
+ "decode_backend='numpy', "
+ 'to_xyz=False, '
+ "file_client_args={'backend': 'disk'})")
diff --git a/tests/test_datasets/test_transform.py b/tests/test_datasets/test_transform.py
index bf4accf67..bd9c05ac4 100644
--- a/tests/test_datasets/test_transform.py
+++ b/tests/test_datasets/test_transform.py
@@ -678,4 +678,31 @@ def test_resize_to_multiple():
results = transform(results)
assert results['img'].shape == (224, 256, 3)
assert results['gt_semantic_seg'].shape == (224, 256)
- assert results['img_shape'] == (224, 256, 3)
+ assert results['img_shape'] == (224, 256)
+
+
+def test_generate_edge():
+ transform = dict(type='GenerateEdge', edge_width=1)
+ transform = TRANSFORMS.build(transform)
+
+ seg_map = np.array([
+ [1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 2],
+ [1, 1, 1, 2, 2],
+ [1, 1, 2, 2, 2],
+ [1, 2, 2, 2, 2],
+ [2, 2, 2, 2, 2],
+ ])
+ results = dict()
+ results['gt_seg_map'] = seg_map
+ results['img_shape'] = seg_map.shape
+
+ results = transform(results)
+ assert np.all(results['gt_edge'] == np.array([
+ [0, 0, 0, 1, 0],
+ [0, 0, 1, 1, 1],
+ [0, 1, 1, 1, 0],
+ [1, 1, 1, 0, 0],
+ [1, 1, 0, 0, 0],
+ [1, 0, 0, 0, 0],
+ ]))
diff --git a/tests/test_evaluation/test_metrics/test_citys_metric.py b/tests/test_evaluation/test_metrics/test_citys_metric.py
index 34c0c9a5e..a6d6db5ca 100644
--- a/tests/test_evaluation/test_metrics/test_citys_metric.py
+++ b/tests/test_evaluation/test_metrics/test_citys_metric.py
@@ -46,6 +46,8 @@ class TestCitysMetric(TestCase):
'tests/data/pseudo_cityscapes_dataset/gtFine/val/\
frankfurt/frankfurt_000000_000294_gtFine_labelTrainIds.png'
+ mm_inputs['seg_map_path'] = mm_inputs['data_sample'][
+ 'seg_map_path']
packed_inputs.append(mm_inputs)
return packed_inputs
@@ -96,16 +98,20 @@ class TestCitysMetric(TestCase):
def test_evaluate(self):
"""Test using the metric in the same way as Evalutor."""
- data_batch = self._demo_mm_inputs()
- predictions = self._demo_mm_model_output()
+ data_batch = self._demo_mm_inputs(2)
+ predictions = self._demo_mm_model_output(2)
+ data_samples = [
+ dict(**data, **result)
+ for data, result in zip(data_batch, predictions)
+ ]
iou_metric = CitysMetric(citys_metrics=['cityscapes'])
- iou_metric.process(data_batch, predictions)
+ iou_metric.process(data_batch, data_samples)
res = iou_metric.evaluate(6)
self.assertIsInstance(res, dict)
# test to_label_id = True
iou_metric = CitysMetric(
citys_metrics=['cityscapes'], to_label_id=True)
- iou_metric.process(data_batch, predictions)
+ iou_metric.process(data_batch, data_samples)
res = iou_metric.evaluate(6)
self.assertIsInstance(res, dict)
import shutil
diff --git a/tests/test_models/test_backbones/test_resnet.py b/tests/test_models/test_backbones/test_resnet.py
index d0e287422..f2f24ba56 100644
--- a/tests/test_models/test_backbones/test_resnet.py
+++ b/tests/test_models/test_backbones/test_resnet.py
@@ -331,7 +331,7 @@ def test_resnet_backbone():
for param in layer.parameters():
assert param.requires_grad is False
for i in range(1, frozen_stages + 1):
- layer = getattr(model, 'layer{}'.format(i))
+ layer = getattr(model, f'layer{i}')
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
@@ -347,7 +347,7 @@ def test_resnet_backbone():
for param in model.stem.parameters():
assert param.requires_grad is False
for i in range(1, frozen_stages + 1):
- layer = getattr(model, 'layer{}'.format(i))
+ layer = getattr(model, f'layer{i}')
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
diff --git a/tests/test_models/test_heads/test_decode_head.py b/tests/test_models/test_heads/test_decode_head.py
new file mode 100644
index 000000000..88e6bed10
--- /dev/null
+++ b/tests/test_models/test_heads/test_decode_head.py
@@ -0,0 +1,193 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest.mock import patch
+
+import pytest
+import torch
+from mmengine.structures import PixelData
+
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+from mmseg.structures import SegDataSample
+from .utils import to_cuda
+
+
+@patch.multiple(BaseDecodeHead, __abstractmethods__=set())
+def test_decode_head():
+
+ with pytest.raises(AssertionError):
+ # default input_transform doesn't accept multiple inputs
+ BaseDecodeHead([32, 16], 16, num_classes=19)
+
+ with pytest.raises(AssertionError):
+ # default input_transform doesn't accept multiple inputs
+ BaseDecodeHead(32, 16, num_classes=19, in_index=[-1, -2])
+
+ with pytest.raises(AssertionError):
+ # supported mode is resize_concat only
+ BaseDecodeHead(32, 16, num_classes=19, input_transform='concat')
+
+ with pytest.raises(AssertionError):
+ # in_channels should be list|tuple
+ BaseDecodeHead(32, 16, num_classes=19, input_transform='resize_concat')
+
+ with pytest.raises(AssertionError):
+ # in_index should be list|tuple
+ BaseDecodeHead([32],
+ 16,
+ in_index=-1,
+ num_classes=19,
+ input_transform='resize_concat')
+
+ with pytest.raises(AssertionError):
+ # len(in_index) should equal len(in_channels)
+ BaseDecodeHead([32, 16],
+ 16,
+ num_classes=19,
+ in_index=[-1],
+ input_transform='resize_concat')
+
+ with pytest.raises(ValueError):
+ # out_channels should be equal to num_classes
+ BaseDecodeHead(32, 16, num_classes=19, out_channels=18)
+
+ # test out_channels
+ head = BaseDecodeHead(32, 16, num_classes=2)
+ assert head.out_channels == 2
+
+ # test out_channels == 1 and num_classes == 2
+ head = BaseDecodeHead(32, 16, num_classes=2, out_channels=1)
+ assert head.out_channels == 1 and head.num_classes == 2
+
+ # test default dropout
+ head = BaseDecodeHead(32, 16, num_classes=19)
+ assert hasattr(head, 'dropout') and head.dropout.p == 0.1
+
+ # test set dropout
+ head = BaseDecodeHead(32, 16, num_classes=19, dropout_ratio=0.2)
+ assert hasattr(head, 'dropout') and head.dropout.p == 0.2
+
+ # test no input_transform
+ inputs = [torch.randn(1, 32, 45, 45)]
+ head = BaseDecodeHead(32, 16, num_classes=19)
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.in_channels == 32
+ assert head.input_transform is None
+ transformed_inputs = head._transform_inputs(inputs)
+ assert transformed_inputs.shape == (1, 32, 45, 45)
+
+ # test input_transform = resize_concat
+ inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)]
+ head = BaseDecodeHead([32, 16],
+ 16,
+ num_classes=19,
+ in_index=[0, 1],
+ input_transform='resize_concat')
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ assert head.in_channels == 48
+ assert head.input_transform == 'resize_concat'
+ transformed_inputs = head._transform_inputs(inputs)
+ assert transformed_inputs.shape == (1, 48, 45, 45)
+
+ # test multi-loss, loss_decode is dict
+ with pytest.raises(TypeError):
+ # loss_decode must be a dict or sequence of dict.
+ BaseDecodeHead(3, 16, num_classes=19, loss_decode=['CrossEntropyLoss'])
+
+ inputs = torch.randn(2, 19, 8, 8).float()
+ data_samples = [
+ SegDataSample(gt_sem_seg=PixelData(data=torch.ones(64, 64).long()))
+ for _ in range(2)
+ ]
+
+ head = BaseDecodeHead(
+ 3,
+ 16,
+ num_classes=19,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ loss = head.loss_by_feat(
+ seg_logits=inputs, batch_data_samples=data_samples)
+ assert 'loss_ce' in loss
+
+ # test multi-loss, loss_decode is list of dict
+ inputs = torch.randn(2, 19, 8, 8).float()
+ data_samples = [
+ SegDataSample(gt_sem_seg=PixelData(data=torch.ones(64, 64).long()))
+ for _ in range(2)
+ ]
+ head = BaseDecodeHead(
+ 3,
+ 16,
+ num_classes=19,
+ loss_decode=[
+ dict(type='CrossEntropyLoss', loss_name='loss_1'),
+ dict(type='CrossEntropyLoss', loss_name='loss_2')
+ ])
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+
+ loss = head.loss_by_feat(
+ seg_logits=inputs, batch_data_samples=data_samples)
+ assert 'loss_1' in loss
+ assert 'loss_2' in loss
+
+ # 'loss_decode' must be a dict or sequence of dict
+ with pytest.raises(TypeError):
+ BaseDecodeHead(3, 16, num_classes=19, loss_decode=['CrossEntropyLoss'])
+ with pytest.raises(TypeError):
+ BaseDecodeHead(3, 16, num_classes=19, loss_decode=0)
+
+ # test multi-loss, loss_decode is list of dict
+ inputs = torch.randn(2, 19, 8, 8).float()
+ data_samples = [
+ SegDataSample(gt_sem_seg=PixelData(data=torch.ones(64, 64).long()))
+ for _ in range(2)
+ ]
+ head = BaseDecodeHead(
+ 3,
+ 16,
+ num_classes=19,
+ loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_1'),
+ dict(type='CrossEntropyLoss', loss_name='loss_2'),
+ dict(type='CrossEntropyLoss', loss_name='loss_3')))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ loss = head.loss_by_feat(
+ seg_logits=inputs, batch_data_samples=data_samples)
+ assert 'loss_1' in loss
+ assert 'loss_2' in loss
+ assert 'loss_3' in loss
+
+ # test multi-loss, loss_decode is list of dict, names of them are identical
+ inputs = torch.randn(2, 19, 8, 8).float()
+ data_samples = [
+ SegDataSample(gt_sem_seg=PixelData(data=torch.ones(64, 64).long()))
+ for _ in range(2)
+ ]
+ head = BaseDecodeHead(
+ 3,
+ 16,
+ num_classes=19,
+ loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_ce'),
+ dict(type='CrossEntropyLoss', loss_name='loss_ce'),
+ dict(type='CrossEntropyLoss', loss_name='loss_ce')))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ loss_3 = head.loss_by_feat(
+ seg_logits=inputs, batch_data_samples=data_samples)
+
+ head = BaseDecodeHead(
+ 3,
+ 16,
+ num_classes=19,
+ loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_ce')))
+ if torch.cuda.is_available():
+ head, inputs = to_cuda(head, inputs)
+ loss = head.loss_by_feat(
+ seg_logits=inputs, batch_data_samples=data_samples)
+ assert 'loss_ce' in loss
+ assert 'loss_ce' in loss_3
+ assert loss_3['loss_ce'] == 3 * loss['loss_ce']
diff --git a/tests/test_models/test_segmentors/__init__.py b/tests/test_models/test_segmentors/__init__.py
new file mode 100644
index 000000000..ef101fec6
--- /dev/null
+++ b/tests/test_models/test_segmentors/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py b/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py
new file mode 100644
index 000000000..941816d25
--- /dev/null
+++ b/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py
@@ -0,0 +1,57 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine import ConfigDict
+
+from mmseg.models import build_segmentor
+from .utils import _segmentor_forward_train_test
+
+
+def test_cascade_encoder_decoder():
+
+ # test 1 decode head, w.o. aux head
+ cfg = ConfigDict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ])
+ cfg.test_cfg = ConfigDict(mode='whole')
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test slide mode
+ cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2))
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 1 aux head
+ cfg = ConfigDict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ],
+ auxiliary_head=dict(type='ExampleDecodeHead'))
+ cfg.test_cfg = ConfigDict(mode='whole')
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 2 aux head
+ cfg = ConfigDict(
+ type='CascadeEncoderDecoder',
+ num_stages=2,
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleCascadeDecodeHead')
+ ],
+ auxiliary_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleDecodeHead')
+ ])
+ cfg.test_cfg = ConfigDict(mode='whole')
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
diff --git a/tests/test_models/test_segmentors/test_encoder_decoder.py b/tests/test_models/test_segmentors/test_encoder_decoder.py
new file mode 100644
index 000000000..81f89db41
--- /dev/null
+++ b/tests/test_models/test_segmentors/test_encoder_decoder.py
@@ -0,0 +1,59 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from mmengine import ConfigDict
+
+from mmseg.models import build_segmentor
+from .utils import _segmentor_forward_train_test
+
+
+def test_encoder_decoder():
+
+ # test 1 decode head, w.o. aux head
+
+ cfg = ConfigDict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'),
+ train_cfg=None,
+ test_cfg=dict(mode='whole'))
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test out_channels == 1
+ cfg = ConfigDict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(
+ type='ExampleDecodeHead', num_classes=2, out_channels=1),
+ train_cfg=None,
+ test_cfg=dict(mode='whole'))
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test slide mode
+ cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2))
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 1 aux head
+ cfg = ConfigDict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'),
+ auxiliary_head=dict(type='ExampleDecodeHead'))
+ cfg.test_cfg = ConfigDict(mode='whole')
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
+
+ # test 1 decode head, 2 aux head
+ cfg = ConfigDict(
+ type='EncoderDecoder',
+ backbone=dict(type='ExampleBackbone'),
+ decode_head=dict(type='ExampleDecodeHead'),
+ auxiliary_head=[
+ dict(type='ExampleDecodeHead'),
+ dict(type='ExampleDecodeHead')
+ ])
+ cfg.test_cfg = ConfigDict(mode='whole')
+ segmentor = build_segmentor(cfg)
+ _segmentor_forward_train_test(segmentor)
diff --git a/tests/test_models/test_segmentors/utils.py b/tests/test_models/test_segmentors/utils.py
new file mode 100644
index 000000000..9b155c096
--- /dev/null
+++ b/tests/test_models/test_segmentors/utils.py
@@ -0,0 +1,134 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from mmengine.optim import OptimWrapper
+from mmengine.structures import PixelData
+from torch import nn
+from torch.optim import SGD
+
+from mmseg.models import SegDataPreProcessor
+from mmseg.models.decode_heads.cascade_decode_head import BaseCascadeDecodeHead
+from mmseg.models.decode_heads.decode_head import BaseDecodeHead
+from mmseg.registry import MODELS
+from mmseg.structures import SegDataSample
+
+
+def _demo_mm_inputs(input_shape=(1, 3, 8, 16), num_classes=10):
+ """Create a superset of inputs needed to run test or train batches.
+
+ Args:
+ input_shape (tuple):
+ input batch dimensions
+
+ num_classes (int):
+ number of semantic classes
+ """
+ (N, C, H, W) = input_shape
+
+ imgs = torch.randn(*input_shape)
+ segs = torch.randint(
+ low=0, high=num_classes - 1, size=(N, H, W), dtype=torch.long)
+
+ img_metas = [{
+ 'img_shape': (H, W),
+ 'ori_shape': (H, W),
+ 'pad_shape': (H, W, C),
+ 'filename': '.png',
+ 'scale_factor': 1.0,
+ 'flip': False,
+ 'flip_direction': 'horizontal'
+ } for _ in range(N)]
+
+ data_samples = [
+ SegDataSample(
+ gt_sem_seg=PixelData(data=segs[i]), metainfo=img_metas[i])
+ for i in range(N)
+ ]
+
+ mm_inputs = {'imgs': torch.FloatTensor(imgs), 'data_samples': data_samples}
+
+ return mm_inputs
+
+
+@MODELS.register_module()
+class ExampleBackbone(nn.Module):
+
+ def __init__(self):
+ super().__init__()
+ self.conv = nn.Conv2d(3, 3, 3)
+
+ def init_weights(self, pretrained=None):
+ pass
+
+ def forward(self, x):
+ return [self.conv(x)]
+
+
+@MODELS.register_module()
+class ExampleDecodeHead(BaseDecodeHead):
+
+ def __init__(self, num_classes=19, out_channels=None):
+ super().__init__(
+ 3, 3, num_classes=num_classes, out_channels=out_channels)
+
+ def forward(self, inputs):
+ return self.cls_seg(inputs[0])
+
+
+@MODELS.register_module()
+class ExampleCascadeDecodeHead(BaseCascadeDecodeHead):
+
+ def __init__(self):
+ super().__init__(3, 3, num_classes=19)
+
+ def forward(self, inputs, prev_out):
+ return self.cls_seg(inputs[0])
+
+
+def _segmentor_forward_train_test(segmentor):
+ if isinstance(segmentor.decode_head, nn.ModuleList):
+ num_classes = segmentor.decode_head[-1].num_classes
+ else:
+ num_classes = segmentor.decode_head.num_classes
+ # batch_size=2 for BatchNorm
+ mm_inputs = _demo_mm_inputs(num_classes=num_classes)
+
+ # convert to cuda Tensor if applicable
+ if torch.cuda.is_available():
+ segmentor = segmentor.cuda()
+
+ # check data preprocessor
+ if not hasattr(segmentor,
+ 'data_preprocessor') or segmentor.data_preprocessor is None:
+ segmentor.data_preprocessor = SegDataPreProcessor()
+
+ mm_inputs = segmentor.data_preprocessor(mm_inputs, True)
+ imgs = mm_inputs.pop('imgs')
+ data_samples = mm_inputs.pop('data_samples')
+
+ # create optimizer wrapper
+ optimizer = SGD(segmentor.parameters(), lr=0.1)
+ optim_wrapper = OptimWrapper(optimizer)
+
+ # Test forward train
+ losses = segmentor.forward(imgs, data_samples, mode='loss')
+ assert isinstance(losses, dict)
+
+ # Test train_step
+ data_batch = dict(inputs=imgs, data_samples=data_samples)
+ outputs = segmentor.train_step(data_batch, optim_wrapper)
+ assert isinstance(outputs, dict)
+ assert 'loss' in outputs
+
+ # Test val_step
+ with torch.no_grad():
+ segmentor.eval()
+ data_batch = dict(inputs=imgs, data_samples=data_samples)
+ outputs = segmentor.val_step(data_batch)
+ assert isinstance(outputs, list)
+
+ # Test forward simple test
+ with torch.no_grad():
+ segmentor.eval()
+ data_batch = dict(inputs=imgs, data_samples=data_samples)
+ results = segmentor.forward(imgs, data_samples, mode='tensor')
+ assert isinstance(results, torch.Tensor)
diff --git a/tests/test_utils/test_io.py b/tests/test_utils/test_io.py
new file mode 100644
index 000000000..05abd275f
--- /dev/null
+++ b/tests/test_utils/test_io.py
@@ -0,0 +1,33 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+import numpy as np
+import pytest
+from mmengine import FileClient
+
+from mmseg.utils import datafrombytes
+
+
+@pytest.mark.parametrize(
+ ['backend', 'suffix'],
+ [['nifti', '.nii.gz'], ['numpy', '.npy'], ['pickle', '.pkl']])
+def test_datafrombytes(backend, suffix):
+
+ file_client = FileClient('disk')
+ file_path = osp.join(osp.dirname(__file__), '../data/biomedical' + suffix)
+ bytes = file_client.get(file_path)
+ data = datafrombytes(bytes, backend)
+
+ if backend == 'pickle':
+ # test pickle loading
+ assert isinstance(data, dict)
+ else:
+ assert isinstance(data, np.ndarray)
+ if backend == 'nifti':
+ # test nifti file loading
+ assert len(data.shape) == 3
+ else:
+ # test npy file loading
+ # testing data biomedical.npy includes data and label
+ assert len(data.shape) == 4
+ assert data.shape[0] == 2
diff --git a/tests/test_visualization/test_local_visualizer.py b/tests/test_visualization/test_local_visualizer.py
index 7754c30ed..b60a9b875 100644
--- a/tests/test_visualization/test_local_visualizer.py
+++ b/tests/test_visualization/test_local_visualizer.py
@@ -118,19 +118,14 @@ class TestSegLocalVisualizer(TestCase):
[255, 0, 0], [0, 0, 142], [0, 0, 70],
[0, 60, 100], [0, 80, 100], [0, 0, 230],
[119, 11, 32]])
- seg_local_visualizer.add_datasample(out_file, image,
- data_sample)
-
# test out_file
- seg_local_visualizer.add_datasample(out_file, image,
- data_sample)
- assert os.path.exists(
- osp.join(tmp_dir, 'vis_data', 'vis_image',
- out_file + '_0.png'))
- drawn_img = cv2.imread(
- osp.join(tmp_dir, 'vis_data', 'vis_image',
- out_file + '_0.png'))
- assert drawn_img.shape == (h, w, 3)
+ seg_local_visualizer.add_datasample(
+ out_file,
+ image,
+ data_sample,
+ out_file=osp.join(tmp_dir, 'test.png'))
+ self._assert_image_and_shape(
+ osp.join(tmp_dir, 'test.png'), (h, w, 3))
# test gt_instances and pred_instances
pred_sem_seg_data = dict(
@@ -139,12 +134,13 @@ class TestSegLocalVisualizer(TestCase):
data_sample.pred_sem_seg = pred_sem_seg
+ # test draw prediction with gt
seg_local_visualizer.add_datasample(out_file, image,
data_sample)
self._assert_image_and_shape(
osp.join(tmp_dir, 'vis_data', 'vis_image',
out_file + '_0.png'), (h, w * 2, 3))
-
+ # test draw prediction without gt
seg_local_visualizer.add_datasample(
out_file, image, data_sample, draw_gt=False)
self._assert_image_and_shape(
diff --git a/tools/analysis_tools/analyze_logs.py b/tools/analysis_tools/analyze_logs.py
index 0539a3c59..7464d2316 100644
--- a/tools/analysis_tools/analyze_logs.py
+++ b/tools/analysis_tools/analyze_logs.py
@@ -101,7 +101,7 @@ def load_json_logs(json_logs):
log_dicts = [dict() for _ in json_logs]
prev_step = 0
for json_log, log_dict in zip(json_logs, log_dicts):
- with open(json_log, 'r') as log_file:
+ with open(json_log) as log_file:
for line in log_file:
log = json.loads(line.strip())
# the final step in json file is 0.
diff --git a/tools/dataset_converters/chase_db1.py b/tools/dataset_converters/chase_db1.py
index 82e687ce1..f4fefbd77 100644
--- a/tools/dataset_converters/chase_db1.py
+++ b/tools/dataset_converters/chase_db1.py
@@ -47,7 +47,7 @@ def main():
print('Generating training dataset...')
assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
- 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)
+ f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}'
for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, img_name))
diff --git a/tools/dataset_converters/hrf.py b/tools/dataset_converters/hrf.py
index 2dd8c2296..3bfd80c9e 100644
--- a/tools/dataset_converters/hrf.py
+++ b/tools/dataset_converters/hrf.py
@@ -63,7 +63,7 @@ def main():
zip_file.extractall(tmp_dir)
assert len(os.listdir(tmp_dir)) == HRF_LEN, \
- 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
+ f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, filename))
@@ -85,7 +85,7 @@ def main():
zip_file.extractall(tmp_dir)
assert len(os.listdir(tmp_dir)) == HRF_LEN, \
- 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
+ f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(tmp_dir, filename))
diff --git a/tools/dataset_converters/isaid.py b/tools/dataset_converters/isaid.py
index 738e2a368..1da264d97 100644
--- a/tools/dataset_converters/isaid.py
+++ b/tools/dataset_converters/isaid.py
@@ -188,17 +188,17 @@ def main():
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test'))
assert os.path.exists(os.path.join(dataset_path, 'train')), \
- 'train is not in {}'.format(dataset_path)
+ f'train is not in {dataset_path}'
assert os.path.exists(os.path.join(dataset_path, 'val')), \
- 'val is not in {}'.format(dataset_path)
+ f'val is not in {dataset_path}'
assert os.path.exists(os.path.join(dataset_path, 'test')), \
- 'test is not in {}'.format(dataset_path)
+ f'test is not in {dataset_path}'
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for dataset_mode in ['train', 'val', 'test']:
# for dataset_mode in [ 'test']:
- print('Extracting {}ing.zip...'.format(dataset_mode))
+ print(f'Extracting {dataset_mode}ing.zip...')
img_zipp_list = glob.glob(
os.path.join(dataset_path, dataset_mode, 'images', '*.zip'))
print('Find the data', img_zipp_list)
diff --git a/tools/dataset_converters/loveda.py b/tools/dataset_converters/loveda.py
index b54ff0b14..5b0ef4bb8 100644
--- a/tools/dataset_converters/loveda.py
+++ b/tools/dataset_converters/loveda.py
@@ -38,11 +38,11 @@ def main():
mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
assert 'Train.zip' in os.listdir(dataset_path), \
- 'Train.zip is not in {}'.format(dataset_path)
+ f'Train.zip is not in {dataset_path}'
assert 'Val.zip' in os.listdir(dataset_path), \
- 'Val.zip is not in {}'.format(dataset_path)
+ f'Val.zip is not in {dataset_path}'
assert 'Test.zip' in os.listdir(dataset_path), \
- 'Test.zip is not in {}'.format(dataset_path)
+ f'Test.zip is not in {dataset_path}'
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
for dataset in ['Train', 'Val', 'Test']:
diff --git a/tools/dataset_converters/stare.py b/tools/dataset_converters/stare.py
index e5e180f40..4a23ba4dd 100644
--- a/tools/dataset_converters/stare.py
+++ b/tools/dataset_converters/stare.py
@@ -68,7 +68,7 @@ def main():
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
- 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+ f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
@@ -103,7 +103,7 @@ def main():
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
- 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+ f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
@@ -142,7 +142,7 @@ def main():
now_dir = osp.join(tmp_dir, 'files')
assert len(os.listdir(now_dir)) == STARE_LEN, \
- 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN)
+ f'len(os.listdir(now_dir)) != {STARE_LEN}'
for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
img = mmcv.imread(osp.join(now_dir, filename))
diff --git a/tools/deployment/pytorch2torchscript.py b/tools/deployment/pytorch2torchscript.py
index 05022326a..e69e705bb 100644
--- a/tools/deployment/pytorch2torchscript.py
+++ b/tools/deployment/pytorch2torchscript.py
@@ -126,7 +126,7 @@ def pytorch2libtorch(model,
print(traced_model.graph)
traced_model.save(output_file)
- print('Successfully exported TorchScript model: {}'.format(output_file))
+ print(f'Successfully exported TorchScript model: {output_file}')
def parse_args():
diff --git a/tools/misc/publish_model.py b/tools/misc/publish_model.py
index e2660578a..c1bbc9ac1 100644
--- a/tools/misc/publish_model.py
+++ b/tools/misc/publish_model.py
@@ -23,7 +23,7 @@ def process_checkpoint(in_file, out_file):
# add the code here.
torch.save(checkpoint, out_file)
sha = subprocess.check_output(['sha256sum', out_file]).decode()
- final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+ final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
subprocess.Popen(['mv', out_file, final_file])
diff --git a/tools/train.py b/tools/train.py
index 8c1e5c7da..172815a9f 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -18,12 +18,9 @@ def parse_args():
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume',
- nargs='?',
- type=str,
- const='auto',
- help='If specify checkpoint path, resume from it, while if not '
- 'specify, try to auto resume from the latest checkpoint '
- 'in the work directory.')
+ action='store_true',
+ default=False,
+ help='resume from the latest checkpoint in the work_dir automatically')
parser.add_argument(
'--amp',
action='store_true',
@@ -90,12 +87,7 @@ def main():
cfg.optim_wrapper.loss_scale = 'dynamic'
# resume training
- if args.resume == 'auto':
- cfg.resume = True
- cfg.load_from = None
- elif args.resume is not None:
- cfg.resume = True
- cfg.load_from = args.resume
+ cfg.resume = args.resume
# build the runner from config
if 'runner_type' not in cfg: