mirror of
https://github.com/open-mmlab/mmsegmentation.git
synced 2025-06-03 22:03:48 +08:00
Merge pull request #2253 from open-mmlab/dev-1.x
Merge MMSegmentation 1.x development branch dev-1.x to main branch 1.x
This commit is contained in:
commit
0c87f7a0c9
@ -49,32 +49,25 @@ jobs:
|
||||
- run:
|
||||
name: Configure Python & pip
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install wheel
|
||||
pip install --upgrade pip
|
||||
pip install wheel
|
||||
- run:
|
||||
name: Install PyTorch
|
||||
command: |
|
||||
python -V
|
||||
python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- run:
|
||||
name: Install mmseg dependencies
|
||||
command: |
|
||||
python -m pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
python -m pip install -U openmim 'importlib-metadata<2'
|
||||
python -m mim install 'mmcv>=2.0.0rc1'
|
||||
python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
python -m pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification@dev-1.x
|
||||
pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
command: |
|
||||
python -m pip install -e .
|
||||
- run:
|
||||
name: Run unittests
|
||||
command: |
|
||||
python -m pip install timm
|
||||
python -m coverage run --branch --source mmseg -m pytest tests/
|
||||
python -m coverage xml
|
||||
python -m coverage report -m
|
||||
pip install -e .
|
||||
- run:
|
||||
name: Skip timm unittests and generate coverage report
|
||||
command: |
|
||||
@ -101,8 +94,8 @@ jobs:
|
||||
# Cloning repos in VM since Docker doesn't have access to the private key
|
||||
name: Clone Repos
|
||||
command: |
|
||||
git clone -b main --depth 1 ssh://git@github.com/open-mmlab/mmengine.git /home/circleci/mmengine
|
||||
git clone -b dev-1.x --depth 1 ssh://git@github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification
|
||||
git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine
|
||||
git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: |
|
||||
@ -112,19 +105,18 @@ jobs:
|
||||
name: Install mmseg dependencies
|
||||
command: |
|
||||
docker exec mmseg pip install -e /mmengine
|
||||
docker exec mmseg pip install -U openmim 'importlib-metadata<2'
|
||||
docker exec mmseg pip install -U openmim
|
||||
docker exec mmseg mim install 'mmcv>=2.0.0rc1'
|
||||
docker exec mmseg pip install -e /mmclassification
|
||||
docker exec mmseg python -m pip install -r requirements.txt
|
||||
docker exec mmseg pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
command: |
|
||||
docker exec mmseg pip install -e .
|
||||
- run:
|
||||
name: Run unittests
|
||||
name: Run unittests but skip timm unittests
|
||||
command: |
|
||||
docker exec mmseg python -m pip install timm
|
||||
docker exec mmseg python -m pytest tests/
|
||||
docker exec mmseg pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
|
||||
workflows:
|
||||
pr_stage_lint:
|
||||
when: << pipeline.parameters.lint_only >>
|
||||
@ -159,8 +151,8 @@ workflows:
|
||||
- lint
|
||||
- build_cpu:
|
||||
name: maximum_version_cpu
|
||||
torch: 1.9.0
|
||||
torchvision: 0.10.0
|
||||
torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
python: 3.9.0
|
||||
requires:
|
||||
- minimum_version_cpu
|
||||
|
@ -56,8 +56,7 @@ def main():
|
||||
|
||||
for model_name, yml_path in yml_list:
|
||||
# Default yaml loader unsafe.
|
||||
model_infos = yml.load(
|
||||
open(yml_path, 'r'), Loader=yml.CLoader)['Models']
|
||||
model_infos = yml.load(open(yml_path), Loader=yml.CLoader)['Models']
|
||||
for model_info in model_infos:
|
||||
config_name = model_info['Name']
|
||||
checkpoint_url = model_info['Weights']
|
||||
|
@ -35,7 +35,7 @@ def process_checkpoint(in_file, out_file):
|
||||
# The hash code calculation and rename command differ on different system
|
||||
# platform.
|
||||
sha = calculate_file_sha256(out_file)
|
||||
final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
|
||||
final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
|
||||
os.rename(out_file, final_file)
|
||||
|
||||
# Remove prefix and suffix
|
||||
@ -54,7 +54,7 @@ def get_final_iter(config):
|
||||
def get_final_results(log_json_path, iter_num):
|
||||
result_dict = dict()
|
||||
last_iter = 0
|
||||
with open(log_json_path, 'r') as f:
|
||||
with open(log_json_path) as f:
|
||||
for line in f.readlines():
|
||||
log_line = json.loads(line)
|
||||
if 'mode' not in log_line.keys():
|
||||
@ -125,7 +125,7 @@ def main():
|
||||
exp_dir = osp.join(work_dir, config_name)
|
||||
# check whether the exps is finished
|
||||
final_iter = get_final_iter(used_config)
|
||||
final_model = 'iter_{}.pth'.format(final_iter)
|
||||
final_model = f'iter_{final_iter}.pth'
|
||||
model_path = osp.join(exp_dir, final_model)
|
||||
|
||||
# skip if the model is still training
|
||||
|
@ -74,7 +74,7 @@ def main():
|
||||
commands.append('\n')
|
||||
commands.append('\n')
|
||||
|
||||
with open(args.txt_path, 'r') as f:
|
||||
with open(args.txt_path) as f:
|
||||
model_cfgs = f.readlines()
|
||||
for i, cfg in enumerate(model_cfgs):
|
||||
create_train_bash_info(commands, cfg, script_name, '$PARTITION',
|
||||
|
@ -86,7 +86,7 @@ def main():
|
||||
val_list = []
|
||||
last_iter = 0
|
||||
for log_name in log_list:
|
||||
with open(os.path.join(preceding_path, log_name), 'r') as f:
|
||||
with open(os.path.join(preceding_path, log_name)) as f:
|
||||
# ignore the info line
|
||||
f.readline()
|
||||
all_lines = f.readlines()
|
||||
|
@ -15,7 +15,7 @@ import sys
|
||||
from lxml import etree
|
||||
from mmengine.fileio import dump
|
||||
|
||||
MMSEG_ROOT = osp.dirname(osp.dirname((osp.dirname(__file__))))
|
||||
MMSEG_ROOT = osp.dirname(osp.dirname(osp.dirname(__file__)))
|
||||
|
||||
COLLECTIONS = [
|
||||
'ANN', 'APCNet', 'BiSeNetV1', 'BiSeNetV2', 'CCNet', 'CGNet', 'DANet',
|
||||
@ -42,7 +42,7 @@ def dump_yaml_and_check_difference(obj, filename, sort_keys=False):
|
||||
str_dump = dump(obj, None, file_format='yaml', sort_keys=sort_keys)
|
||||
if osp.isfile(filename):
|
||||
file_exists = True
|
||||
with open(filename, 'r', encoding='utf-8') as f:
|
||||
with open(filename, encoding='utf-8') as f:
|
||||
str_orig = f.read()
|
||||
else:
|
||||
file_exists = False
|
||||
@ -97,7 +97,7 @@ def parse_md(md_file):
|
||||
# should be set with head or neck of this config file.
|
||||
is_backbone = None
|
||||
|
||||
with open(md_file, 'r', encoding='UTF-8') as md:
|
||||
with open(md_file, encoding='UTF-8') as md:
|
||||
lines = md.readlines()
|
||||
i = 0
|
||||
current_dataset = ''
|
||||
|
261
.github/workflows/build.yml
vendored
261
.github/workflows/build.yml
vendored
@ -1,261 +0,0 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- 'demo/**'
|
||||
- '.dev/**'
|
||||
- 'docker/**'
|
||||
- 'tools/**'
|
||||
- '**.md'
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'demo/**'
|
||||
- '.dev/**'
|
||||
- 'docker/**'
|
||||
- 'tools/**'
|
||||
- 'docs/**'
|
||||
- '**.md'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.6.0, 1.7.0, 1.8.0, 1.9.0]
|
||||
include:
|
||||
- torch: 1.6.0
|
||||
torch_version: torch1.6
|
||||
torchvision: 0.7.0
|
||||
- torch: 1.7.0
|
||||
torch_version: torch1.7
|
||||
torchvision: 0.8.1
|
||||
- torch: 1.8.0
|
||||
torch_version: torch1.8
|
||||
torchvision: 0.9.0
|
||||
- torch: 1.9.0
|
||||
torch_version: torch1.9
|
||||
torchvision: 0.10.0
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: pip install Pillow==6.2.2
|
||||
if: ${{matrix.torchvision == '0.4.2'}}
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
python -c 'from mmengine.utils.dl_utils import collect_env;print(collect_env())'
|
||||
- name: Install MMCV
|
||||
run: |
|
||||
pip install -U openmim
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
python -c 'import mmcv; print(mmcv.__version__)'
|
||||
- name: Install unittest dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
- name: Skip timm unittests and generate coverage report
|
||||
run: |
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
|
||||
coverage xml
|
||||
coverage report -m
|
||||
|
||||
build_cuda101:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch:
|
||||
[
|
||||
1.6.0+cu101,
|
||||
1.7.0+cu101,
|
||||
1.8.0+cu101
|
||||
]
|
||||
include:
|
||||
- torch: 1.6.0+cu101
|
||||
torch_version: torch1.6
|
||||
torchvision: 0.7.0+cu101
|
||||
- torch: 1.7.0+cu101
|
||||
torch_version: torch1.7
|
||||
torchvision: 0.8.1+cu101
|
||||
- torch: 1.8.0+cu101
|
||||
torch_version: torch1.8
|
||||
torchvision: 0.9.0+cu101
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 python${{matrix.python-version}}-dev
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
- name: Install Pillow
|
||||
run: python -m pip install Pillow==6.2.2
|
||||
if: ${{matrix.torchvision < 0.5}}
|
||||
- name: Install PyTorch
|
||||
run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
python -m pip install -U openmim
|
||||
python -m pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
python -c 'import mmcv; print(mmcv.__version__)'
|
||||
python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: python -m pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
rm -rf .eggs
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 python -m pip install .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
python -m pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
- name: Skip timm unittests and generate coverage report
|
||||
run: |
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
|
||||
coverage xml
|
||||
coverage report -m
|
||||
|
||||
build_cuda102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.7, 3.8, 3.9]
|
||||
torch: [1.9.0+cu102]
|
||||
include:
|
||||
- torch: 1.9.0+cu102
|
||||
torch_version: torch1.9
|
||||
torchvision: 0.10.0+cu102
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
|
||||
apt-get clean
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
- name: Install Pillow
|
||||
run: python -m pip install Pillow==6.2.2
|
||||
if: ${{matrix.torchvision < 0.5}}
|
||||
- name: Install PyTorch
|
||||
run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
python -m pip install -U openmim
|
||||
python -m pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
LC_ALL=C.UTF-8 LC_ALL=C.UTF-8 mim install 'mmcv>=2.0.0rc1'
|
||||
python -c 'import mmcv; print(mmcv.__version__)'
|
||||
python -m pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: python -m pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
rm -rf .eggs
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 python -m pip install .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
python -m pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v2
|
||||
with:
|
||||
files: ./coverage.xml
|
||||
flags: unittests
|
||||
env_vars: OS,PYTHON
|
||||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
test_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.8]
|
||||
platform: [cpu, cu111]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade --user
|
||||
- name: Install OpenCV
|
||||
run: pip install opencv-python>=3
|
||||
- name: Install PyTorch
|
||||
# As a complement to Linux CI, we test on PyTorch LTS version
|
||||
run: pip install torch==1.8.2+${{ matrix.platform }} torchvision==0.9.2+${{ matrix.platform }} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
- name: Install MMEngine
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
- name: Install MMCV
|
||||
run: |
|
||||
pip install -U openmim
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
- name: Install MMClassification
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: pip install -e .
|
||||
- name: Run unittests
|
||||
run: |
|
||||
python -m pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
coverage xml
|
||||
coverage report -m
|
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@ -28,4 +28,4 @@ jobs:
|
||||
- name: Check docstring coverage
|
||||
run: |
|
||||
python -m pip install interrogate
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmseg/ops --ignore-regex "__repr__" --fail-under 75 mmseg
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 75 mmseg
|
||||
|
203
.github/workflows/merge_stage_test.yml
vendored
Normal file
203
.github/workflows/merge_stage_test.yml
vendored
Normal file
@ -0,0 +1,203 @@
|
||||
name: merge_stage_test
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- 'README.md'
|
||||
- 'README_zh-CN.md'
|
||||
- 'docs/**'
|
||||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
branches:
|
||||
- dev-1.x
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_cpu_py:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.8, 3.9]
|
||||
torch: [1.8.1]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: pip install Pillow==6.2.2
|
||||
if: ${{matrix.torchvision == '0.4.2'}}
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
build_cpu_pt:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0]
|
||||
include:
|
||||
- torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
- torch: 1.7.1
|
||||
torchvision: 0.8.2
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
- torch: 1.9.1
|
||||
torchvision: 0.10.1
|
||||
- torch: 1.10.1
|
||||
torchvision: 0.11.2
|
||||
- torch: 1.11.0
|
||||
torchvision: 0.12.0
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: pip install Pillow==6.2.2
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
# timm from v0.6.11 requires torch>=1.7
|
||||
if: ${{matrix.torch >= '1.7.0'}}
|
||||
run: |
|
||||
pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
- name: Skip timm unittests and generate coverage report
|
||||
run: |
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
|
||||
coverage xml
|
||||
coverage report -m
|
||||
# Only upload coverage report for python3.7 && pytorch1.8.1 without timm
|
||||
- name: Upload coverage to Codecov
|
||||
if: ${{matrix.torch == '1.8.1' && matrix.python-version == '3.7'}}
|
||||
uses: codecov/codecov-action@v2
|
||||
with:
|
||||
files: ./coverage.xml
|
||||
flags: unittests
|
||||
env_vars: OS,PYTHON
|
||||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
cuda: 10.2
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Install OpenCV
|
||||
run: pip install opencv-python>=3
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: pip install -e .
|
||||
- name: Run unittests
|
||||
run: |
|
||||
pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
coverage xml
|
||||
coverage report -m
|
140
.github/workflows/pr_stage_test.yml
vendored
Normal file
140
.github/workflows/pr_stage_test.yml
vendored
Normal file
@ -0,0 +1,140 @@
|
||||
name: pr_stage_test
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'README.md'
|
||||
- 'README_zh-CN.md'
|
||||
- 'docs/**'
|
||||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: pip install Pillow==6.2.2
|
||||
if: ${{matrix.torchvision == '0.4.2'}}
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install other dependencies
|
||||
run: |
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
|
||||
coverage xml
|
||||
coverage report -m
|
||||
# Upload coverage report for python3.7 && pytorch1.8.1 cpu without timm
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1.0.14
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
env_vars: OS,PYTHON
|
||||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Install OpenCV
|
||||
run: pip install opencv-python>=3
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
- name: Install mmseg dependencies
|
||||
run: |
|
||||
python -V
|
||||
pip install -U openmim
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
|
||||
- name: Install unittest dependencies
|
||||
run: pip install -r requirements/tests.txt -r requirements/optional.txt
|
||||
- name: Build and install
|
||||
run: pip install -e .
|
||||
- name: Run unittests
|
||||
run: |
|
||||
pip install timm
|
||||
coverage run --branch --source mmseg -m pytest tests/ --ignore tests\test_models\test_forward.py tests\test_models\test_backbones\test_beit.py
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
coverage xml
|
||||
coverage report -m
|
@ -1,6 +1,6 @@
|
||||
repos:
|
||||
- repo: https://gitlab.com/pycqa/flake8.git
|
||||
rev: 3.8.3
|
||||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
@ -8,11 +8,11 @@ repos:
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
rev: v0.30.0
|
||||
rev: v0.32.0
|
||||
hooks:
|
||||
- id: yapf
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.1.0
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: check-yaml
|
||||
@ -34,7 +34,7 @@ repos:
|
||||
- mdformat_frontmatter
|
||||
- linkify-it-py
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.1.0
|
||||
rev: v2.2.1
|
||||
hooks:
|
||||
- id: codespell
|
||||
- repo: https://github.com/myint/docformatter
|
||||
@ -52,6 +52,11 @@ repos:
|
||||
language: python
|
||||
files: ^configs/.*\.md$
|
||||
require_serial: true
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.0.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
args: ["--py36-plus"]
|
||||
- repo: https://github.com/open-mmlab/pre-commit-hooks
|
||||
rev: v0.2.0 # Use the rev to fix revision
|
||||
hooks:
|
||||
|
@ -62,12 +62,11 @@ The 1.x branch works with **PyTorch 1.6+**.
|
||||
|
||||
## What's New
|
||||
|
||||
v1.0.0rc0 was released in 31/8/2022.
|
||||
v1.0.0rc1 was released in 2/11/2022.
|
||||
Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history.
|
||||
|
||||
- Unifies interfaces of all components based on MMEngine.
|
||||
- Faster training and testing speed with complete support of mixed precision training.
|
||||
- Refactored and more flexible architecture.
|
||||
- Support PoolFormer ([#2191](https://github.com/open-mmlab/mmsegmentation/pull/2191))
|
||||
- Add Decathlon dataset ([#2227](https://github.com/open-mmlab/mmsegmentation/pull/2227))
|
||||
|
||||
## Installation
|
||||
|
||||
@ -102,6 +101,7 @@ Supported backbones:
|
||||
- [x] [BEiT (ICLR'2022)](configs/beit)
|
||||
- [x] [ConvNeXt (CVPR'2022)](configs/convnext)
|
||||
- [x] [MAE (CVPR'2022)](configs/mae)
|
||||
- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
|
||||
|
||||
Supported methods:
|
||||
|
||||
@ -198,6 +198,7 @@ This project is released under the [Apache 2.0 license](LICENSE).
|
||||
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
|
||||
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
|
||||
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
|
||||
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
|
||||
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
|
||||
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
|
||||
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
|
||||
|
@ -61,7 +61,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
|
||||
|
||||
## 更新日志
|
||||
|
||||
最新版本 v1.0.0rc0 在 2022.8.31 发布。
|
||||
最新版本 v1.0.0rc1 在 2022.11.2 发布。
|
||||
如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/notes/changelog.md)。
|
||||
|
||||
## 安装
|
||||
@ -96,6 +96,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O
|
||||
- [x] [BEiT (ICLR'2022)](configs/beit)
|
||||
- [x] [ConvNeXt (CVPR'2022)](configs/convnext)
|
||||
- [x] [MAE (CVPR'2022)](configs/mae)
|
||||
- [x] [PoolFormer (CVPR'2022)](configs/poolformer)
|
||||
|
||||
已支持的算法:
|
||||
|
||||
@ -189,6 +190,7 @@ MMSegmentation 是一个由来自不同高校和企业的研发人员共同参
|
||||
- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
|
||||
- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
|
||||
- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
|
||||
- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
|
||||
- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
|
||||
- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
|
||||
- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
|
||||
|
50
configs/_base_/models/fpn_poolformer_s12.py
Normal file
50
configs/_base_/models/fpn_poolformer_s12.py
Normal file
@ -0,0 +1,50 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa
|
||||
custom_imports = dict(imports='mmcls.models', allow_failed_imports=False)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='mmcls.PoolFormer',
|
||||
arch='s12',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
|
||||
in_patch_size=7,
|
||||
in_stride=4,
|
||||
in_pad=2,
|
||||
down_patch_size=3,
|
||||
down_stride=2,
|
||||
down_pad=1,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
out_indices=(0, 2, 4, 6),
|
||||
frozen_stages=0,
|
||||
),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
@ -55,7 +55,7 @@ In this work, we revisit atrous convolution, a powerful tool to explicitly adjus
|
||||
| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) |
|
||||
| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) |
|
||||
| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) |
|
||||
| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 76.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) |
|
||||
| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 75.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) |
|
||||
| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) |
|
||||
| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) |
|
||||
|
||||
|
@ -326,7 +326,7 @@ Models:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: Cityscapes
|
||||
Metrics:
|
||||
mIoU: 76.63
|
||||
mIoU: 75.63
|
||||
mIoU(ms+flip): 77.51
|
||||
Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -9,6 +9,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320, c1_in_channels=24),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320, c1_in_channels=24),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
@ -7,6 +7,7 @@ model = dict(
|
||||
widen_factor=1.,
|
||||
strides=(1, 2, 2, 1, 1, 1, 1),
|
||||
dilations=(1, 1, 1, 2, 2, 4, 4),
|
||||
out_indices=(1, 2, 4, 6)),
|
||||
out_indices=(1, 2, 4, 6),
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True)),
|
||||
decode_head=dict(in_channels=320),
|
||||
auxiliary_head=dict(in_channels=96))
|
||||
|
65
configs/poolformer/README.md
Normal file
65
configs/poolformer/README.md
Normal file
@ -0,0 +1,65 @@
|
||||
# PoolFormer
|
||||
|
||||
[MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418)
|
||||
|
||||
## Introduction
|
||||
|
||||
<!-- [BACKBONE] -->
|
||||
|
||||
<a href="https://github.com/sail-sg/poolformer/tree/main/segmentation">Official Repo</a>
|
||||
|
||||
<a href="https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198">Code Snippet</a>
|
||||
|
||||
## Abstract
|
||||
|
||||
<!-- [ABSTRACT] -->
|
||||
|
||||
Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design. Code is available at [this https URL](https://github.com/sail-sg/poolformer)
|
||||
|
||||
<!-- [IMAGE] -->
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/15921929/144710761-1635f59a-abde-4946-984c-a2c3f22a19d2.png" width="70%"/>
|
||||
</div>
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@inproceedings{yu2022metaformer,
|
||||
title={Metaformer is actually what you need for vision},
|
||||
author={Yu, Weihao and Luo, Mi and Zhou, Pan and Si, Chenyang and Zhou, Yichen and Wang, Xinchao and Feng, Jiashi and Yan, Shuicheng},
|
||||
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||||
pages={10819--10829},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
- PoolFormer backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks.
|
||||
|
||||
```shell
|
||||
pip install "mmcls>=1.0.0rc0"
|
||||
```
|
||||
|
||||
- The pretrained models could also be downloaded from [PoolFormer config of MMClassification](https://github.com/open-mmlab/mmclassification/tree/master/configs/poolformer).
|
||||
|
||||
## Results and models
|
||||
|
||||
### ADE20K
|
||||
|
||||
| Method | Backbone | Crop Size | pretrain | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | mIoU\* | mIoU\*(ms+flip) | config | download |
|
||||
| ------ | -------------- | --------- | ----------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------ | --------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| FPN | PoolFormer-S12 | 512x512 | ImageNet-1K | 32 | 40000 | 4.17 | 23.48 | 36.68 | - | 37.07 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json) |
|
||||
| FPN | PoolFormer-S24 | 512x512 | ImageNet-1K | 32 | 40000 | 5.47 | 15.74 | 40.12 | - | 40.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json) |
|
||||
| FPN | PoolFormer-S36 | 512x512 | ImageNet-1K | 32 | 40000 | 6.77 | 11.34 | 41.61 | - | 41.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json) |
|
||||
| FPN | PoolFormer-M36 | 512x512 | ImageNet-1K | 32 | 40000 | 8.59 | 8.97 | 41.95 | - | 42.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json) |
|
||||
| FPN | PoolFormer-M48 | 512x512 | ImageNet-1K | 32 | 40000 | 10.48 | 6.69 | 42.43 | - | 42.76 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json) |
|
||||
|
||||
Note:
|
||||
|
||||
- We replace `AlignedResize` in original PoolFormer implementation to `Resize + ResizeToMultiple`.
|
||||
|
||||
- `mIoU` with * is collected when `Resize + ResizeToMultiple` is adopted in `test_pipeline`, so do `mIoU` in logs.
|
||||
|
||||
- The Test Time Augmentation i.e., "ms+flip" in MMSegmentation v1.x is developing, stay tuned!
|
@ -0,0 +1,11 @@
|
||||
_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m36_3rdparty_32xb128_in1k_20220414-c55e0949.pth' # noqa
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
arch='m36',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')),
|
||||
neck=dict(in_channels=[96, 192, 384, 768]))
|
@ -0,0 +1,11 @@
|
||||
_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m48_3rdparty_32xb128_in1k_20220414-9378f3eb.pth' # noqa
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
arch='m48',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')),
|
||||
neck=dict(in_channels=[96, 192, 384, 768]))
|
@ -0,0 +1,91 @@
|
||||
_base_ = [
|
||||
'../_base_/models/fpn_poolformer_s12.py', '../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_40k.py'
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
dataset_type = 'ADE20KDataset'
|
||||
data_root = 'data/ade/ADEChallengeData2016'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 512)
|
||||
data_preprocessor = dict(size=crop_size)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
dict(type='ResizeToMultiple', size_divisor=32),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='RepeatDataset',
|
||||
times=50,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training',
|
||||
seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline)))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
data_preprocessor=data_preprocessor,
|
||||
neck=dict(in_channels=[64, 128, 320, 512]),
|
||||
decode_head=dict(num_classes=150))
|
||||
|
||||
# optimizer
|
||||
# optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.0001)
|
||||
# optimizer_config = dict()
|
||||
# # learning policy
|
||||
# lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
|
||||
optim_wrapper = dict(
|
||||
_delete_=True,
|
||||
type='AmpOptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=0.0002, weight_decay=0.0001))
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='PolyLR',
|
||||
power=0.9,
|
||||
begin=0,
|
||||
end=40000,
|
||||
eta_min=0.0,
|
||||
by_epoch=False,
|
||||
)
|
||||
]
|
@ -0,0 +1,9 @@
|
||||
_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s24_3rdparty_32xb128_in1k_20220414-d7055904.pth' # noqa
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
arch='s24',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')))
|
@ -0,0 +1,10 @@
|
||||
_base_ = './fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py'
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s36_3rdparty_32xb128_in1k_20220414-d78ff3e8.pth' # noqa
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
arch='s36',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')))
|
106
configs/poolformer/poolformer.yml
Normal file
106
configs/poolformer/poolformer.yml
Normal file
@ -0,0 +1,106 @@
|
||||
Models:
|
||||
- Name: fpn_poolformer_s12_8xb4-40k_ade20k-512x512
|
||||
In Collection: FPN
|
||||
Metadata:
|
||||
backbone: PoolFormer-S12
|
||||
crop size: (512,512)
|
||||
lr schd: 40000
|
||||
inference time (ms/im):
|
||||
- value: 42.59
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
Training Memory (GB): 4.17
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 36.68
|
||||
Config: configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth
|
||||
- Name: fpn_poolformer_s24_8xb4-40k_ade20k-512x512
|
||||
In Collection: FPN
|
||||
Metadata:
|
||||
backbone: PoolFormer-S24
|
||||
crop size: (512,512)
|
||||
lr schd: 40000
|
||||
inference time (ms/im):
|
||||
- value: 63.53
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
Training Memory (GB): 5.47
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 40.12
|
||||
Config: configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth
|
||||
- Name: ''
|
||||
In Collection: FPN
|
||||
Metadata:
|
||||
backbone: PoolFormer-S36
|
||||
crop size: (512,512)
|
||||
lr schd: 40000
|
||||
inference time (ms/im):
|
||||
- value: 88.18
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
Training Memory (GB): 6.77
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 41.61
|
||||
Config: ''
|
||||
Weights: ''
|
||||
- Name: fpn_poolformer_m36_8xb4-40k_ade20k-512x512
|
||||
In Collection: FPN
|
||||
Metadata:
|
||||
backbone: PoolFormer-M36
|
||||
crop size: (512,512)
|
||||
lr schd: 40000
|
||||
inference time (ms/im):
|
||||
- value: 111.48
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
Training Memory (GB): 8.59
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 41.95
|
||||
Config: configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth
|
||||
- Name: fpn_poolformer_m48_8xb4-40k_ade20k-512x512
|
||||
In Collection: FPN
|
||||
Metadata:
|
||||
backbone: PoolFormer-M48
|
||||
crop size: (512,512)
|
||||
lr schd: 40000
|
||||
inference time (ms/im):
|
||||
- value: 149.48
|
||||
hardware: V100
|
||||
backend: PyTorch
|
||||
batch size: 1
|
||||
mode: FP32
|
||||
resolution: (512,512)
|
||||
Training Memory (GB): 10.48
|
||||
Results:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 42.43
|
||||
Config: configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth
|
@ -77,20 +77,13 @@ using `AlignedResize`, you can change the dataset pipeline like this:
|
||||
```python
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 512),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
# resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU.
|
||||
dict(type='ResizeToMultiple', size_divisor=32),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU.
|
||||
dict(type='ResizeToMultiple', size_divisor=32),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
```
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
_base_ = './segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py'
|
||||
_base_ = './segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py'
|
||||
|
||||
model = dict(
|
||||
decode_head=dict(
|
||||
|
@ -63,7 +63,7 @@ This script convert the model from `PRETRAIN_PATH` and store the converted model
|
||||
| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) |
|
||||
| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) |
|
||||
| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) |
|
||||
| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.54 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) |
|
||||
| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.39 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) |
|
||||
|
||||
### Cityscapes
|
||||
|
||||
|
@ -92,7 +92,7 @@ Models:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 47.54
|
||||
mIoU: 47.39
|
||||
mIoU(ms+flip): 49.37
|
||||
Config: configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth
|
||||
|
@ -71,6 +71,6 @@ In our default setting, pretrained models and their corresponding [original mode
|
||||
| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) |
|
||||
| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) |
|
||||
| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) |
|
||||
| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.31 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) |
|
||||
| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.13 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) |
|
||||
| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) |
|
||||
| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) |
|
||||
|
@ -75,7 +75,7 @@ Models:
|
||||
- Task: Semantic Segmentation
|
||||
Dataset: ADE20K
|
||||
Metrics:
|
||||
mIoU: 50.31
|
||||
mIoU: 50.13
|
||||
mIoU(ms+flip): 51.9
|
||||
Config: configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py
|
||||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth
|
||||
|
@ -7,7 +7,7 @@
|
||||
"id": "view-in-github"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/master/demo/MMSegmentation_Tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
"<a href=\"https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/dev-1.x/demo/MMSegmentation_Tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -68,8 +68,12 @@
|
||||
"source": [
|
||||
"# Install PyTorch\n",
|
||||
"!conda install pytorch=1.10.0 torchvision cudatoolkit=11.1 -c pytorch\n",
|
||||
"# Install mim\n",
|
||||
"!pip install -U openmim\n",
|
||||
"# Install mmengine\n",
|
||||
"!mim install mmengine\n",
|
||||
"# Install MMCV\n",
|
||||
"!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10/index.html"
|
||||
"!mim install 'mmcv >= 2.0.0rc1'"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -85,7 +89,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!rm -rf mmsegmentation\n",
|
||||
"!git clone https://github.com/open-mmlab/mmsegmentation.git \n",
|
||||
"!git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git \n",
|
||||
"%cd mmsegmentation\n",
|
||||
"!pip install -e ."
|
||||
]
|
||||
@ -111,110 +115,15 @@
|
||||
"print(mmseg.__version__)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "eUcuC3dUv32I"
|
||||
},
|
||||
"source": [
|
||||
"## Run Inference with MMSeg trained weight"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "2hd41IGaiNet",
|
||||
"outputId": "b7b2aafc-edf2-43e4-ea43-0b5dd0aa4b4a"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!mkdir checkpoints\n",
|
||||
"!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "H8Fxg8i-wHJE"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mmseg.apis import inference_model, init_model, show_result_pyplot\n",
|
||||
"from mmseg.utils import get_palette"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "umk8sJ0Xuace"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n",
|
||||
"checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "nWlQFuTgudxu",
|
||||
"outputId": "5e45f4f6-5bcf-4d04-bb9c-0428ee84a576"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# build the model from a config file and a checkpoint file\n",
|
||||
"model = init_model(config_file, checkpoint_file, device='cuda:0')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "izFv6pSRujk9"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test a single image\n",
|
||||
"img = 'demo/demo.png'\n",
|
||||
"result = inference_model(model, img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 504
|
||||
},
|
||||
"id": "bDcs9udgunQK",
|
||||
"outputId": "7c55f713-4085-47fd-fa06-720a321d0795"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# show the results\n",
|
||||
"show_result_pyplot(model, img, result, get_palette('cityscapes'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Ta51clKX4cwM"
|
||||
},
|
||||
"source": [
|
||||
"## Train a semantic segmentation model on a new dataset\n",
|
||||
"## Finetune a semantic segmentation model on a new dataset\n",
|
||||
"\n",
|
||||
"To train on a customized dataset, the following steps are necessary. \n",
|
||||
"To finetune on a customized dataset, the following steps are necessary. \n",
|
||||
"1. Add a new dataset class. \n",
|
||||
"2. Create a config file accordingly. \n",
|
||||
"3. Perform training and evaluation. "
|
||||
@ -268,8 +177,10 @@
|
||||
"source": [
|
||||
"# Let's take a look at the dataset\n",
|
||||
"import mmcv\n",
|
||||
"import mmengine\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"plt.imshow(mmcv.bgr2rgb(img))\n",
|
||||
@ -293,18 +204,30 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os.path as osp\n",
|
||||
"import numpy as np\n",
|
||||
"from PIL import Image\n",
|
||||
"# convert dataset annotation to semantic segmentation map\n",
|
||||
"# define dataset root and directory for images and annotations\n",
|
||||
"data_root = 'iccv09Data'\n",
|
||||
"img_dir = 'images'\n",
|
||||
"ann_dir = 'labels'\n",
|
||||
"# define class and plaette for better visualization\n",
|
||||
"# define class and palette for better visualization\n",
|
||||
"classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n",
|
||||
"palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n",
|
||||
" [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]\n",
|
||||
"for file in mmcv.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
|
||||
" [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "WnGZfribFHCx"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os.path as osp\n",
|
||||
"import numpy as np\n",
|
||||
"from PIL import Image\n",
|
||||
"\n",
|
||||
"# convert dataset annotation to semantic segmentation map\n",
|
||||
"for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
|
||||
" seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n",
|
||||
" seg_img = Image.fromarray(seg_map).convert('P')\n",
|
||||
" seg_img.putpalette(np.array(palette, dtype=np.uint8))\n",
|
||||
@ -351,8 +274,8 @@
|
||||
"source": [
|
||||
"# split train/val set randomly\n",
|
||||
"split_dir = 'splits'\n",
|
||||
"mmcv.mkdir_or_exist(osp.join(data_root, split_dir))\n",
|
||||
"filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(\n",
|
||||
"mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n",
|
||||
"filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n",
|
||||
" osp.join(data_root, ann_dir), suffix='.png')]\n",
|
||||
"with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n",
|
||||
" # select first 4/5 as train set\n",
|
||||
@ -380,18 +303,15 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mmseg.datasets.builder import DATASETS\n",
|
||||
"from mmseg.datasets.custom import BaseSegDataset\n",
|
||||
"from mmseg.registry import DATASETS\n",
|
||||
"from mmseg.datasets import BaseSegDataset\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@DATASETS.register_module()\n",
|
||||
"class StanfordBackgroundDataset(BaseSegDataset):\n",
|
||||
" CLASSES = classes\n",
|
||||
" PALETTE = palette\n",
|
||||
" def __init__(self, split, **kwargs):\n",
|
||||
" super().__init__(img_suffix='.jpg', seg_map_suffix='.png', \n",
|
||||
" split=split, **kwargs)\n",
|
||||
" assert osp.exists(self.img_dir) and self.split is not None\n",
|
||||
"\n",
|
||||
" METAINFO = dict(classes = classes, palette = palette)\n",
|
||||
" def __init__(self, **kwargs):\n",
|
||||
" super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
@ -405,6 +325,16 @@
|
||||
"In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download config and checkpoint files\n",
|
||||
"!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -413,8 +343,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mmcv import Config\n",
|
||||
"cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')"
|
||||
"from mmengine import Config\n",
|
||||
"cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n",
|
||||
"print(f'Config:\\n{cfg.pretty_text}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -438,10 +369,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mmseg.apis import set_random_seed\n",
|
||||
"\n",
|
||||
"# Since we use only one GPU, BN is used instead of SyncBN\n",
|
||||
"cfg.norm_cfg = dict(type='BN', requires_grad=True)\n",
|
||||
"cfg.crop_size = (256, 256)\n",
|
||||
"cfg.model.data_preprocessor.size = cfg.crop_size\n",
|
||||
"cfg.model.backbone.norm_cfg = cfg.norm_cfg\n",
|
||||
"cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n",
|
||||
"cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n",
|
||||
@ -453,79 +384,55 @@
|
||||
"cfg.dataset_type = 'StanfordBackgroundDataset'\n",
|
||||
"cfg.data_root = data_root\n",
|
||||
"\n",
|
||||
"cfg.data.samples_per_gpu = 8\n",
|
||||
"cfg.data.workers_per_gpu=8\n",
|
||||
"cfg.train_dataloader.batch_size = 8\n",
|
||||
"\n",
|
||||
"cfg.img_norm_cfg = dict(\n",
|
||||
" mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n",
|
||||
"cfg.crop_size = (256, 256)\n",
|
||||
"cfg.train_pipeline = [\n",
|
||||
" dict(type='LoadImageFromFile'),\n",
|
||||
" dict(type='LoadAnnotations'),\n",
|
||||
" dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),\n",
|
||||
" dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n",
|
||||
" dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n",
|
||||
" dict(type='RandomFlip', flip_ratio=0.5),\n",
|
||||
" dict(type='PhotoMetricDistortion'),\n",
|
||||
" dict(type='Normalize', **cfg.img_norm_cfg),\n",
|
||||
" dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),\n",
|
||||
" dict(type='DefaultFormatBundle'),\n",
|
||||
" dict(type='Collect', keys=['img', 'gt_semantic_seg']),\n",
|
||||
" dict(type='RandomFlip', prob=0.5),\n",
|
||||
" dict(type='PackSegInputs')\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"cfg.test_pipeline = [\n",
|
||||
" dict(type='LoadImageFromFile'),\n",
|
||||
" dict(\n",
|
||||
" type='MultiScaleFlipAug',\n",
|
||||
" img_scale=(320, 240),\n",
|
||||
" # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],\n",
|
||||
" flip=False,\n",
|
||||
" transforms=[\n",
|
||||
" dict(type='Resize', keep_ratio=True),\n",
|
||||
" dict(type='RandomFlip'),\n",
|
||||
" dict(type='Normalize', **cfg.img_norm_cfg),\n",
|
||||
" dict(type='ImageToTensor', keys=['img']),\n",
|
||||
" dict(type='Collect', keys=['img']),\n",
|
||||
" ])\n",
|
||||
" dict(type='Resize', scale=(320, 240), keep_ratio=True),\n",
|
||||
" # add loading annotation after ``Resize`` because ground truth\n",
|
||||
" # does not need to do resize data transform\n",
|
||||
" dict(type='LoadAnnotations'),\n",
|
||||
" dict(type='PackSegInputs')\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"cfg.data.train.type = cfg.dataset_type\n",
|
||||
"cfg.data.train.data_root = cfg.data_root\n",
|
||||
"cfg.data.train.img_dir = img_dir\n",
|
||||
"cfg.data.train.ann_dir = ann_dir\n",
|
||||
"cfg.data.train.pipeline = cfg.train_pipeline\n",
|
||||
"cfg.data.train.split = 'splits/train.txt'\n",
|
||||
"cfg.train_dataloader.dataset.type = cfg.dataset_type\n",
|
||||
"cfg.train_dataloader.dataset.data_root = cfg.data_root\n",
|
||||
"cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
|
||||
"cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n",
|
||||
"cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n",
|
||||
"\n",
|
||||
"cfg.data.val.type = cfg.dataset_type\n",
|
||||
"cfg.data.val.data_root = cfg.data_root\n",
|
||||
"cfg.data.val.img_dir = img_dir\n",
|
||||
"cfg.data.val.ann_dir = ann_dir\n",
|
||||
"cfg.data.val.pipeline = cfg.test_pipeline\n",
|
||||
"cfg.data.val.split = 'splits/val.txt'\n",
|
||||
"cfg.val_dataloader.dataset.type = cfg.dataset_type\n",
|
||||
"cfg.val_dataloader.dataset.data_root = cfg.data_root\n",
|
||||
"cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
|
||||
"cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n",
|
||||
"cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n",
|
||||
"\n",
|
||||
"cfg.data.test.type = cfg.dataset_type\n",
|
||||
"cfg.data.test.data_root = cfg.data_root\n",
|
||||
"cfg.data.test.img_dir = img_dir\n",
|
||||
"cfg.data.test.ann_dir = ann_dir\n",
|
||||
"cfg.data.test.pipeline = cfg.test_pipeline\n",
|
||||
"cfg.data.test.split = 'splits/val.txt'\n",
|
||||
"cfg.test_dataloader = cfg.val_dataloader\n",
|
||||
"\n",
|
||||
"# We can still use the pre-trained Mask RCNN model though we do not need to\n",
|
||||
"# use the mask branch\n",
|
||||
"cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
|
||||
"\n",
|
||||
"# Load the pretrained weights\n",
|
||||
"cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
|
||||
"\n",
|
||||
"# Set up working dir to save files and logs.\n",
|
||||
"cfg.work_dir = './work_dirs/tutorial'\n",
|
||||
"\n",
|
||||
"cfg.runner.max_iters = 200\n",
|
||||
"cfg.log_config.interval = 10\n",
|
||||
"cfg.evaluation.interval = 200\n",
|
||||
"cfg.checkpoint_config.interval = 200\n",
|
||||
"cfg.train_cfg.max_iters = 200\n",
|
||||
"cfg.train_cfg.val_interval = 200\n",
|
||||
"cfg.default_hooks.logger.interval = 10\n",
|
||||
"cfg.default_hooks.checkpoint.interval = 200\n",
|
||||
"\n",
|
||||
"# Set seed to facitate reproducing the result\n",
|
||||
"cfg.seed = 0\n",
|
||||
"set_random_seed(0, deterministic=False)\n",
|
||||
"cfg.gpu_ids = range(1)\n",
|
||||
"# Set seed to facilitate reproducing the result\n",
|
||||
"cfg['randomness'] = dict(seed=0)\n",
|
||||
"\n",
|
||||
"# Let's have a look at the final config used for training\n",
|
||||
"print(f'Config:\\n{cfg.pretty_text}')"
|
||||
@ -552,23 +459,23 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mmseg.datasets import build_dataset\n",
|
||||
"from mmseg.models import build_segmentor\n",
|
||||
"from mmseg.apis import train_segmentor\n",
|
||||
"from mmengine.runner import Runner\n",
|
||||
"from mmseg.utils import register_all_modules\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Build the dataset\n",
|
||||
"datasets = [build_dataset(cfg.data.train)]\n",
|
||||
"\n",
|
||||
"# Build the detector\n",
|
||||
"model = build_segmentor(cfg.model)\n",
|
||||
"# Add an attribute for visualization convenience\n",
|
||||
"model.CLASSES = datasets[0].CLASSES\n",
|
||||
"\n",
|
||||
"# Create work_dir\n",
|
||||
"mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
|
||||
"train_segmentor(model, datasets, cfg, distributed=False, validate=True, \n",
|
||||
" meta=dict())"
|
||||
"# register all modules in mmseg into the registries\n",
|
||||
"# do not init the default scope here because it will be init in the runner\n",
|
||||
"register_all_modules(init_default_scope=False)\n",
|
||||
"runner = Runner.from_cfg(cfg)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start training\n",
|
||||
"runner.train()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -593,20 +500,17 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
|
||||
"from mmseg.apis import inference_model, show_result_pyplot\n",
|
||||
"\n",
|
||||
"model.cfg = cfg\n",
|
||||
"model=runner.model\n",
|
||||
"model.cfg=cfg\n",
|
||||
"\n",
|
||||
"img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
|
||||
"result = inference_model(model, img)\n",
|
||||
"plt.figure(figsize=(8, 6))\n",
|
||||
"show_result_pyplot(model, img, result, palette)"
|
||||
"vis_result = show_result_pyplot(model, img, result, palette)\n",
|
||||
"plt.imshow(mmcv.bgr2rgb(vis_result))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -618,7 +522,7 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3.7.13 ('pt1.12')",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -632,7 +536,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
"version": "3.7.13"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
@ -642,6 +546,11 @@
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "ffdb7915c29738c259ec7ee5d0d1b9253c264f1fd267d45dd77f1a420396c120"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -21,6 +21,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import mmcv\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from mmengine.model.utils import revert_sync_batchnorm\n",
|
||||
"from mmseg.apis import init_model, inference_model, show_result_pyplot\n",
|
||||
"from mmseg.utils import register_all_modules\n",
|
||||
@ -71,7 +73,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# show the results\n",
|
||||
"show_result_pyplot(model, img, result)"
|
||||
"vis_result = show_result_pyplot(model, img, result)\n",
|
||||
"plt.imshow(mmcv.bgr2rgb(vis_result))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
ARG PYTORCH="1.11.0"
|
||||
ARG CUDA="11.3"
|
||||
ARG CUDNN="8"
|
||||
ARG MMCV="2.0.0rc1"
|
||||
|
||||
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
|
||||
|
||||
@ -22,10 +23,12 @@ RUN conda clean --all
|
||||
ARG PYTORCH
|
||||
ARG CUDA
|
||||
ARG MMCV
|
||||
RUN ["/bin/bash", "-c", "pip install --no-cache-dir mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
|
||||
RUN ["/bin/bash", "-c", "pip install openmim"]
|
||||
RUN ["/bin/bash", "-c", "mim install mmengine"]
|
||||
RUN ["/bin/bash", "-c", "mim install mmcv==${MMCV}"]
|
||||
|
||||
# Install MMSegmentation
|
||||
RUN git clone https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation
|
||||
RUN git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation
|
||||
WORKDIR /mmsegmentation
|
||||
ENV FORCE_CUDA="1"
|
||||
RUN pip install -r requirements.txt
|
||||
|
@ -3,8 +3,8 @@ ARG CUDA="11.3"
|
||||
ARG CUDNN="8"
|
||||
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
|
||||
|
||||
ARG MMCV="1.4.8"
|
||||
ARG MMSEG="0.24.1"
|
||||
ARG MMCV="2.0.0rc1"
|
||||
ARG MMSEG="1.0.0rc1"
|
||||
|
||||
ENV PYTHONUNBUFFERED TRUE
|
||||
|
||||
@ -26,7 +26,9 @@ RUN pip install torchserve torch-model-archiver
|
||||
# MMLAB
|
||||
ARG PYTORCH
|
||||
ARG CUDA
|
||||
RUN ["/bin/bash", "-c", "pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
|
||||
RUN ["/bin/bash", "-c", "pip install openmim"]
|
||||
RUN ["/bin/bash", "-c", "mim install mmengine"]
|
||||
RUN ["/bin/bash", "-c", "mim install mmcv==${MMCV}"]
|
||||
RUN pip install mmsegmentation==${MMSEG}
|
||||
|
||||
RUN useradd -m model-server \
|
||||
|
260
docs/en/advanced_guides/add_models.md
Normal file
260
docs/en/advanced_guides/add_models.md
Normal file
@ -0,0 +1,260 @@
|
||||
# Add New Modules
|
||||
|
||||
## Develop new components
|
||||
|
||||
We can customize all the components introduced at [the model documentation](./models.md), such as **backbone**, **head**, **loss function** and **data preprocessor**.
|
||||
|
||||
### Add new backbones
|
||||
|
||||
Here we show how to develop a new backbone with an example of MobileNet.
|
||||
|
||||
1. Create a new file `mmseg/models/backbones/mobilenet.py`.
|
||||
|
||||
```python
|
||||
import torch.nn as nn
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class MobileNet(nn.Module):
|
||||
|
||||
def __init__(self, arg1, arg2):
|
||||
pass
|
||||
|
||||
def forward(self, x): # should return a tuple
|
||||
pass
|
||||
|
||||
def init_weights(self, pretrained=None):
|
||||
pass
|
||||
```
|
||||
|
||||
2. Import the module in `mmseg/models/backbones/__init__.py`.
|
||||
|
||||
```python
|
||||
from .mobilenet import MobileNet
|
||||
```
|
||||
|
||||
3. Use it in your config file.
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
...
|
||||
backbone=dict(
|
||||
type='MobileNet',
|
||||
arg1=xxx,
|
||||
arg2=xxx),
|
||||
...
|
||||
```
|
||||
|
||||
### Add new heads
|
||||
|
||||
In MMSegmentation, we provide a [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/decode_heads/decode_head.py#L17) for developing all segmentation heads.
|
||||
All newly implemented decode heads should be derived from it.
|
||||
Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
|
||||
|
||||
First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
|
||||
PSPNet implements a decode head for segmentation decode.
|
||||
To implement a decode head, we need to implement three functions of the new module as the following.
|
||||
|
||||
```python
|
||||
from mmseg.registry import MODELS
|
||||
|
||||
@MODELS.register_module()
|
||||
class PSPHead(BaseDecodeHead):
|
||||
|
||||
def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
|
||||
super(PSPHead, self).__init__(**kwargs)
|
||||
|
||||
def init_weights(self):
|
||||
pass
|
||||
|
||||
def forward(self, inputs):
|
||||
pass
|
||||
```
|
||||
|
||||
Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py`, thus the corresponding registry could find and load them.
|
||||
|
||||
To config file of PSPNet is as the following
|
||||
|
||||
```python
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
|
||||
|
||||
```
|
||||
|
||||
### Add new loss
|
||||
|
||||
Assume you want to add a new loss as `MyLoss` for segmentation decode.
|
||||
To add a new loss function, the users need to implement it in `mmseg/models/losses/my_loss.py`.
|
||||
The decorator `weighted_loss` enables the loss to be weighted for each element.
|
||||
|
||||
```python
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
from .utils import weighted_loss
|
||||
|
||||
@weighted_loss
|
||||
def my_loss(pred, target):
|
||||
assert pred.size() == target.size() and target.numel() > 0
|
||||
loss = torch.abs(pred - target)
|
||||
return loss
|
||||
|
||||
@MODELS.register_module()
|
||||
class MyLoss(nn.Module):
|
||||
|
||||
def __init__(self, reduction='mean', loss_weight=1.0):
|
||||
super(MyLoss, self).__init__()
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
target,
|
||||
weight=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None):
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
loss = self.loss_weight * my_loss(
|
||||
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
|
||||
return loss
|
||||
```
|
||||
|
||||
Then the users need to add it in the `mmseg/models/losses/__init__.py`.
|
||||
|
||||
```python
|
||||
from .my_loss import MyLoss, my_loss
|
||||
|
||||
```
|
||||
|
||||
To use it, modify the `loss_xxx` field.
|
||||
Then you need to modify the `loss_decode` field in the head.
|
||||
`loss_weight` could be used to balance multiple losses.
|
||||
|
||||
```python
|
||||
loss_decode=dict(type='MyLoss', loss_weight=1.0))
|
||||
```
|
||||
|
||||
### Add new data preprocessor
|
||||
|
||||
In MMSegmentation 1.x versions, we use [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/data_preprocessor.py#L13) to copy data to the target device and preprocess the data into the model input format as default. Here we show how to develop a new data preprocessor.
|
||||
|
||||
1. Create a new file `mmseg/models/my_datapreprocessor.py`.
|
||||
|
||||
```python
|
||||
from mmengine.model import BaseDataPreprocessor
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
|
||||
@MODELS.register_module()
|
||||
class MyDataPreProcessor(BaseDataPreprocessor):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def forward(self, data: dict, training: bool=False) -> Dict[str, Any]:
|
||||
# TODO Define the logic for data pre-processing in the forward method
|
||||
pass
|
||||
```
|
||||
|
||||
2. Import your data preprocessor in `mmseg/models/__init__.py`
|
||||
|
||||
```python
|
||||
from .my_datapreprocessor import MyDataPreProcessor
|
||||
```
|
||||
|
||||
3. Use it in your config file.
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
data_preprocessor=dict(type='MyDataPreProcessor)
|
||||
...
|
||||
)
|
||||
```
|
||||
|
||||
## Develop new segmentors
|
||||
|
||||
The segmentor is an algorithmic architecture in which users can customize their algorithms by adding customized components and defining the logic of algorithm execution. Please refer to [the model document](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/advanced_guides/models.md) for more details.
|
||||
|
||||
Since the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) in MMSegmentation unifies three modes for a forward process, to develop a new segmentor, users need to overwrite `loss`, `predict` and `_forward` methods corresponding to the `loss`, `predict` and `tensor` modes.
|
||||
|
||||
Here we show how to develop a new segmentor.
|
||||
|
||||
1. Create a new file `mmseg/models/segmentors/my_segmentor.py`.
|
||||
|
||||
```python
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
import torch
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
from mmseg.models import BaseSegmentor
|
||||
|
||||
@MODELS.register_module()
|
||||
class MySegmentor(BaseSegmentor):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
# TODO users should build components of the network here
|
||||
|
||||
def loss(self, inputs: Tensor, data_samples: SampleList) -> dict:
|
||||
"""Calculate losses from a batch of inputs and data samples."""
|
||||
pass
|
||||
|
||||
def predict(self, inputs: Tensor, data_samples: OptSampleList=None) -> SampleList:
|
||||
"""Predict results from a batch of inputs and data samples with post-
|
||||
processing."""
|
||||
pass
|
||||
|
||||
def _forward(self,
|
||||
inputs: Tensor,
|
||||
data_samples: OptSampleList = None) -> Tuple[List[Tensor]]:
|
||||
"""Network forward process.
|
||||
|
||||
Usually includes backbone, neck and head forward without any post-
|
||||
processing.
|
||||
"""
|
||||
pass
|
||||
```
|
||||
|
||||
2. Import your segmentor in `mmseg/models/segmentors/__init__.py`.
|
||||
|
||||
```python
|
||||
from .my_segmentor import MySegmentor
|
||||
```
|
||||
|
||||
3. Use it in your config file.
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
type='MySegmentor'
|
||||
...
|
||||
)
|
||||
```
|
@ -1,234 +0,0 @@
|
||||
# Add New Modules
|
||||
|
||||
## Customize optimizer
|
||||
|
||||
Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`.
|
||||
You need to first implement the new optimizer in a file, e.g., in `mmseg/engine/optimizers/my_optimizer.py`:
|
||||
|
||||
```python
|
||||
from mmcv.runner import OPTIMIZERS
|
||||
from torch.optim import Optimizer
|
||||
|
||||
|
||||
@OPTIMIZERS.register_module
|
||||
class MyOptimizer(Optimizer):
|
||||
|
||||
def __init__(self, a, b, c)
|
||||
|
||||
```
|
||||
|
||||
Then add this module in `mmseg/engine/optimizers/__init__.py` thus the registry will
|
||||
find the new module and add it:
|
||||
|
||||
```python
|
||||
from .my_optimizer import MyOptimizer
|
||||
```
|
||||
|
||||
Then you can use `MyOptimizer` in `optimizer` field of config files.
|
||||
In the configs, the optimizers are defined by the field `optimizer` like the following:
|
||||
|
||||
```python
|
||||
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
|
||||
```
|
||||
|
||||
To use your own optimizer, the field can be changed as
|
||||
|
||||
```python
|
||||
optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
|
||||
```
|
||||
|
||||
We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
|
||||
For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
|
||||
|
||||
```python
|
||||
optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
|
||||
```
|
||||
|
||||
The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
|
||||
|
||||
## Customize optimizer constructor
|
||||
|
||||
Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNoarm layers.
|
||||
The users can do those fine-grained parameter tuning through customizing optimizer constructor.
|
||||
|
||||
```python
|
||||
from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS
|
||||
from .cocktail_optimizer import CocktailOptimizer
|
||||
|
||||
|
||||
@OPTIM_WRAPPER_CONSTRUCTORS.register_module
|
||||
class CocktailOptimizerConstructor(object):
|
||||
|
||||
def __init__(self, optim_wrapper_cfg, paramwise_cfg=None):
|
||||
|
||||
def __call__(self, model):
|
||||
|
||||
return my_optimizer
|
||||
|
||||
```
|
||||
|
||||
## Develop new components
|
||||
|
||||
There are mainly 2 types of components in MMSegmentation.
|
||||
|
||||
- backbone: usually stacks of convolutional network to extract feature maps, e.g., ResNet, HRNet.
|
||||
- head: the component for semantic segmentation map decoding.
|
||||
|
||||
### Add new backbones
|
||||
|
||||
Here we show how to develop new components with an example of MobileNet.
|
||||
|
||||
1. Create a new file `mmseg/models/backbones/mobilenet.py`.
|
||||
|
||||
```python
|
||||
import torch.nn as nn
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
|
||||
|
||||
@MODELS.register_module
|
||||
class MobileNet(nn.Module):
|
||||
|
||||
def __init__(self, arg1, arg2):
|
||||
pass
|
||||
|
||||
def forward(self, x): # should return a tuple
|
||||
pass
|
||||
|
||||
def init_weights(self, pretrained=None):
|
||||
pass
|
||||
```
|
||||
|
||||
2. Import the module in `mmseg/models/backbones/__init__.py`.
|
||||
|
||||
```python
|
||||
from .mobilenet import MobileNet
|
||||
```
|
||||
|
||||
3. Use it in your config file.
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
...
|
||||
backbone=dict(
|
||||
type='MobileNet',
|
||||
arg1=xxx,
|
||||
arg2=xxx),
|
||||
...
|
||||
```
|
||||
|
||||
### Add new heads
|
||||
|
||||
In MMSegmentation, we provide a base [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py) for all segmentation head.
|
||||
All newly implemented decode heads should be derived from it.
|
||||
Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following.
|
||||
|
||||
First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`.
|
||||
PSPNet implements a decode head for segmentation decode.
|
||||
To implement a decode head, basically we need to implement three functions of the new module as the following.
|
||||
|
||||
```python
|
||||
from mmseg.registry import MODELS
|
||||
|
||||
@MODELS.register_module()
|
||||
class PSPHead(BaseDecodeHead):
|
||||
|
||||
def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
|
||||
super(PSPHead, self).__init__(**kwargs)
|
||||
|
||||
def init_weights(self):
|
||||
|
||||
def forward(self, inputs):
|
||||
|
||||
```
|
||||
|
||||
Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py` thus the corresponding registry could find and load them.
|
||||
|
||||
To config file of PSPNet is as the following
|
||||
|
||||
```python
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)))
|
||||
|
||||
```
|
||||
|
||||
### Add new loss
|
||||
|
||||
Assume you want to add a new loss as `MyLoss` for segmentation decode.
|
||||
To add a new loss function, the users need implement it in `mmseg/models/losses/my_loss.py`.
|
||||
The decorator `weighted_loss` enable the loss to be weighted for each element.
|
||||
|
||||
```python
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from mmseg.registry import MODELS
|
||||
from .utils import weighted_loss
|
||||
|
||||
@weighted_loss
|
||||
def my_loss(pred, target):
|
||||
assert pred.size() == target.size() and target.numel() > 0
|
||||
loss = torch.abs(pred - target)
|
||||
return loss
|
||||
|
||||
@LOSSES.register_module
|
||||
class MyLoss(nn.Module):
|
||||
|
||||
def __init__(self, reduction='mean', loss_weight=1.0):
|
||||
super(MyLoss, self).__init__()
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
target,
|
||||
weight=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None):
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
loss = self.loss_weight * my_loss(
|
||||
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
|
||||
return loss
|
||||
```
|
||||
|
||||
Then the users need to add it in the `mmseg/models/losses/__init__.py`.
|
||||
|
||||
```python
|
||||
from .my_loss import MyLoss, my_loss
|
||||
|
||||
```
|
||||
|
||||
To use it, modify the `loss_xxx` field.
|
||||
Then you need to modify the `loss_decode` field in the head.
|
||||
`loss_weight` could be used to balance multiple losses.
|
||||
|
||||
```python
|
||||
loss_decode=dict(type='MyLoss', loss_weight=1.0))
|
||||
```
|
@ -1 +1,158 @@
|
||||
# Evaluation
|
||||
|
||||
The evaluation procedure would be executed at [ValLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L300) and [TestLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L373), users can evaluate model performance during training or using the test script with simple settings in the configuration file. The `ValLoop` and `TestLoop` are properties of [Runner](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L59), they will be built the first time they are called. To build the `ValLoop` successfully, the `val_dataloader` and `val_evaluator` must be set when building `Runner` since `dataloder` and `evaluator` are required parameters, and the same goes for `TestLoop`. For more information about the Runner's design, please refer to the [documentoation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) of [MMEngine](https://github.com/open-mmlab/mmengine).
|
||||
|
||||
<center>
|
||||
<img src='../../../resources/test_step.png' />
|
||||
<center>test_step/val_step dataflow</center>
|
||||
</center>
|
||||
|
||||
In MMSegmentation, we write the settings of dataloader and metrics in the config files of datasets and the configuration of the evaluation loop in the `schedule_x` config files by default.
|
||||
|
||||
For example, in the ADE20K config file `configs/_base_/dataset/ade20k.py`, on lines 37 to 48, we configured the `val_dataloader`, on line 51, we select `IoUMetric` as the evaluator and set `mIoU` as the metric:
|
||||
|
||||
```python
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
```
|
||||
|
||||
To be able to evaluate the model during training, for example, we add the evaluation configuration to the file `configs/schedules/schedule_40k.py` on lines 15 to 16:
|
||||
|
||||
```python
|
||||
train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
```
|
||||
|
||||
With the above two settings, MMSegmentation evaluates the **mIoU** metric of the model once every 4000 iterations during the training of 40K iterations.
|
||||
|
||||
If we would like to test the model after training, we need to add the `test_dataloader`, `test_evaluator` and `test_cfg` configs to the config file.
|
||||
|
||||
```python
|
||||
test_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_cfg = dict(type='TestLoop')
|
||||
```
|
||||
|
||||
In MMSegmentation, the settings of `test_dataloader` and `test_evaluator` are the same as the `ValLoop`'s dataloader and evaluator by default, we can modify these settings to meet our needs.
|
||||
|
||||
## IoUMetric
|
||||
|
||||
MMSegmentation implements [IoUMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/iou_metric.py) and [CitysMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/citys_metric.py) for evaluating the performance of models, based on the [BaseMetric](https://github.com/open-mmlab/mmengine/blob/main/mmengine/evaluator/metric.py) provided by [MMEngine](https://github.com/open-mmlab/mmengine). Please refer to [the documentation](https://mmengine.readthedocs.io/en/latest/tutorials/evaluation.html) for more details about the unified evaluation interface.
|
||||
|
||||
Here we briefly describe the arguments and the two main methods of `IoUMetric`.
|
||||
|
||||
The constructor of `IoUMetric` has some additional parameters besides the base `collect_device` and `prefix`.
|
||||
|
||||
The arguments of the constructor:
|
||||
|
||||
- ignore_index (int) - Index that will be ignored in evaluation. Default: 255.
|
||||
- iou_metrics (list\[str\] | str) - Metrics to be calculated, the options includes 'mIoU', 'mDice' and 'mFscore'.
|
||||
- nan_to_num (int, optional) - If specified, NaN values will be replaced by the numbers defined by the user. Default: None.
|
||||
- beta (int) - Determines the weight of recall in the combined score. Default: 1.
|
||||
- collect_device (str) - Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
|
||||
- prefix (str, optional) - The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If the prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None.
|
||||
|
||||
`IoUMetric` implements the IoU metric calculation, the core two methods of `IoUMetric` are `process` and `compute_metrics`.
|
||||
|
||||
- `process` method processes one batch of data and data_samples.
|
||||
- `compute_metrics` method computes the metrics from processed results.
|
||||
|
||||
#### IoUMetric.process
|
||||
|
||||
Parameters:
|
||||
|
||||
- data_batch (Any) - A batch of data from the dataloader.
|
||||
- data_samples (Sequence\[dict\]) - A batch of outputs from the model.
|
||||
|
||||
Returns:
|
||||
|
||||
This method doesn't have returns since the processed results would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed.
|
||||
|
||||
#### IoUMetric.compute_metrics
|
||||
|
||||
Parameters:
|
||||
|
||||
- results (list) - The processed results of each batch.
|
||||
|
||||
Returns:
|
||||
|
||||
- Dict\[str, float\] - The computed metrics. The keys are the names of the metrics, and the values are corresponding results. The key mainly includes **aAcc**, **mIoU**, **mAcc**, **mDice**, **mFscore**, **mPrecision**, **mRecall**.
|
||||
|
||||
## CitysMetric
|
||||
|
||||
`CitysMetric` uses the official [CityscapesScripts](https://github.com/mcordts/cityscapesScripts) provided by Cityscapes to evaluate model performance.
|
||||
|
||||
### Usage
|
||||
|
||||
Before using it, please install the `cityscapesscripts` package first:
|
||||
|
||||
```shell
|
||||
pip install cityscapesscripts
|
||||
```
|
||||
|
||||
Since the `IoUMetric` is used as the default evaluator in MMSegmentation, if you would like to use `CitysMetric`, customizing the config file is required. In your customized config file, you should overwrite the default evaluator as follows.
|
||||
|
||||
```python
|
||||
val_evaluator = dict(type='CitysMetric', citys_metrics=['cityscapes'])
|
||||
test_evaluator = val_evaluator
|
||||
```
|
||||
|
||||
### Interface
|
||||
|
||||
The arguments of the constructor:
|
||||
|
||||
- ignore_index (int) - Index that will be ignored in evaluation. Default: 255.
|
||||
- citys_metrics (list\[str\] | str) - Metrics to be evaluated, Default: \['cityscapes'\].
|
||||
- to_label_id (bool) - whether convert output to label_id for submission. Default: True.
|
||||
- suffix (str): The filename prefix of the png files. If the prefix is "somepath/xxx", the png files will be named "somepath/xxx.png". Default: '.format_cityscapes'.
|
||||
- collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
|
||||
- prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If the prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None.
|
||||
|
||||
#### CitysMetric.process
|
||||
|
||||
This method would draw the masks on images and save the painted images to `work_dir`.
|
||||
|
||||
Parameters:
|
||||
|
||||
- data_batch (Any) - A batch of data from the dataloader.
|
||||
- data_samples (Sequence\[dict\]) - A batch of outputs from the model.
|
||||
|
||||
Returns:
|
||||
|
||||
This method doesn't have returns, the annotations' path would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed.
|
||||
|
||||
#### CitysMetric.compute_metrics
|
||||
|
||||
This method would call `cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling` tool to calculate metrics.
|
||||
|
||||
Parameters:
|
||||
|
||||
- results (list) - Testing results of the dataset.
|
||||
|
||||
Returns:
|
||||
|
||||
- dict\[str: float\] - Cityscapes evaluation results.
|
||||
|
@ -1 +1,179 @@
|
||||
# Models
|
||||
|
||||
# Models
|
||||
|
||||
We usually define a neural network in a deep learning task as a model, and this model is the core of an algorithm. [MMEngine](https://github.com/open-mmlab/mmengine) abstracts a unified model [BaseModel](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/base_model.py#L16) to standardize the interfaces for training, testing and other processes. All models implemented by MMSegmentation inherit from `BaseModel`, and in MMSegmentation we implemented forward and added some functions for the semantic segmentation algorithm.
|
||||
|
||||
## Common components
|
||||
|
||||
### Segmentor
|
||||
|
||||
In MMSegmentation, we abstract the network architecture as a **Segmentor**, it is a model that contains all components of a network. We have already implemented **EncoderDecoder** and **CascadeEncoderDecoder**, which typically consist of **Data preprocessor**, **Backbone**, **Decode head** and **Auxiliary head**.
|
||||
|
||||
### Data preprocessor
|
||||
|
||||
**Data preprocessor** is the part that copies data to the target device and preprocesses the data into the model input format.
|
||||
|
||||
### Backbone
|
||||
|
||||
**Backbone** is the part that transforms an image to feature maps, such as a **ResNet-50** without the last fully connected layer.
|
||||
|
||||
### Neck
|
||||
|
||||
**Neck** is the part that connects the backbone and heads. It performs some refinements or reconfigurations on the raw feature maps produced by the backbone. An example is **Feature Pyramid Network (FPN)**.
|
||||
|
||||
### Decode Head
|
||||
|
||||
**Decode Head** is the part that transforms the feature maps into a segmentation mask, such as **PSPNet**.
|
||||
|
||||
### Auxiliary head
|
||||
|
||||
**Auxiliary head** is an optional component that transforms the feature maps into segmentation masks which only used for computing auxiliary losses.
|
||||
|
||||
## Basic interfaces
|
||||
|
||||
MMSegmentation wraps `BaseModel` and implements the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) class, which mainly provides the interfaces `forward`, `train_step`, `val_step` and `test_step`. The following will introduce these interfaces in detail.
|
||||
|
||||
### forward
|
||||
|
||||
<center>
|
||||
<img src='../../../resources/encoder_decoder_dataflow.png' />
|
||||
<center>EncoderDecoder dataflow</center>
|
||||
</center>
|
||||
|
||||
<center>
|
||||
<center><img src='../../../resources/cascade_encoder_decoder_dataflow.png' /></center>
|
||||
<center>CascadeEncoderDecoder dataflow</center>
|
||||
</center>
|
||||
|
||||
The `forward` method returns losses or predictions of training, validation, testing, and a simple inference process.
|
||||
|
||||
The method should accept three modes: "tensor", "predict" and "loss":
|
||||
|
||||
- "tensor": Forward the whole network and return the tensor or tuple of tensor without any post-processing, same as a common `nn.Module`.
|
||||
- "predict": Forward and return the predictions, which are fully processed to a list of `SegDataSample`.
|
||||
- "loss": Forward and return a `dict` of losses according to the given inputs and data samples.
|
||||
|
||||
**Note:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) is a data structure interface of MMSegmentation, it is used as an interface between different components. `SegDataSample` implements the abstract data element `mmengine.structures.BaseDataElement`, please refer to [the SegDataSample documentation](https://mmsegmentation.readthedocs.io/en/1.x/advanced_guides/structures.html) and [data element documentation](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/data_element.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information.
|
||||
|
||||
Note that this method doesn't handle either backpropagation or optimizer updating, which are done in the method `train_step`.
|
||||
|
||||
Parameters:
|
||||
|
||||
- inputs (torch.Tensor) - The input tensor with shape (N, C, ...) in general.
|
||||
- data_sample (list\[[SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py)\]) - The seg data samples. It usually includes information such as `metainfo` and `gt_sem_seg`. Default to None.
|
||||
- mode (str) - Return what kind of value. Defaults to 'tensor'.
|
||||
|
||||
Returns:
|
||||
|
||||
- `dict` or `list`:
|
||||
- If `mode == "loss"`, return a `dict` of loss tensor used for backward and logging.
|
||||
- If `mode == "predict"`, return a `list` of `SegDataSample`, the inference results will be incrementally added to the `data_sample` parameter passed to the forward method, each `SegDataSample` contains the following keys:
|
||||
- pred_sem_seg (`PixelData`): Prediction of semantic segmentation.
|
||||
- seg_logits (`PixelData`): Predicted logits of semantic segmentation before normalization.
|
||||
- If `mode == "tensor"`, return a `tensor` or `tuple of tensor` or `dict` of `tensor` for custom use.
|
||||
|
||||
### prediction modes
|
||||
|
||||
We briefly describe the fields of the model's configuration in [the config documentation](../user_guides/1_config.md), here we elaborate on the `model.test_cfg` field. `model.test_cfg` is used to control forward behavior, the `forward` method in `"predict"` mode can run in two modes:
|
||||
|
||||
- `whole_inference`: If `cfg.model.test_cfg.mode == 'whole'`, model will inference with full images.
|
||||
|
||||
An `whole_inference` mode example config:
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
type='EncoderDecoder'
|
||||
...
|
||||
test_cfg=dict(mode='whole')
|
||||
)
|
||||
```
|
||||
|
||||
- `slide_inference`: If `cfg.model.test_cfg.mode == 'slide'`, model will inference by sliding-window. **Note:** if you select the `slide` mode, `cfg.model.test_cfg.stride` and `cfg.model.test_cfg.crop_size` should also be specified.
|
||||
|
||||
An `slide_inference` mode example config:
|
||||
|
||||
```python
|
||||
model = dict(
|
||||
type='EncoderDecoder'
|
||||
...
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170)
|
||||
)
|
||||
```
|
||||
|
||||
### train_step
|
||||
|
||||
The `train_step` method calls the forward interface of the `loss` mode to get the loss `dict`. The `BaseModel` class implements the default model training process including preprocessing, model forward propagation, loss calculation, optimization, and back-propagation.
|
||||
|
||||
Parameters:
|
||||
|
||||
- data (dict or tuple or list) - Data sampled from the dataset. In MMSegmentation, the data dict contains `inputs` and `data_samples` two fields.
|
||||
- optim_wrapper (OptimWrapper) - OptimWrapper instance used to update model parameters.
|
||||
|
||||
**Note:** [OptimWrapper](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/optimizer_wrapper.py#L17) provides a common interface for updating parameters, please refer to optimizer wrapper [documentation](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/optim_wrapper.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information.
|
||||
|
||||
Returns:
|
||||
|
||||
- Dict\[str, `torch.Tensor`\]: A `dict` of tensor for logging.
|
||||
|
||||
<center>
|
||||
<img src='../../../resources/train_step.png' />
|
||||
<center>train_step dataflow</center>
|
||||
</center>
|
||||
|
||||
### val_step
|
||||
|
||||
The `val_step` method calls the forward interface of the `predict` mode and returns the prediction result, which is further passed to the process interface of the evaluator and the `after_val_iter` interface of the Hook.
|
||||
|
||||
Parameters:
|
||||
|
||||
- data (`dict` or `tuple` or `list`) - Data sampled from the dataset. In MMSegmentation, the data dict contains `inputs` and `data_samples` two fields.
|
||||
|
||||
Returns:
|
||||
|
||||
- `list` - The predictions of given data.
|
||||
|
||||
<center>
|
||||
<img src='../../../resources/test_step.png' />
|
||||
<center>test_step/val_step dataflow</center>
|
||||
</center>
|
||||
|
||||
### test_step
|
||||
|
||||
The `BaseModel` implements `test_step` the same as `val_step`.
|
||||
|
||||
## Data Preprocessor
|
||||
|
||||
The [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) implemented by MMSegmentation inherits from the [BaseDataPreprocessor](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/data_preprocessor.py#L18) implemented by [MMEngine](https://github.com/open-mmlab/mmengine) and provides the functions of data preprocessing and copying data to the target device.
|
||||
|
||||
The runner carries the model to the specified device during the construction stage, while the data is carried to the specified device by the [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) in `train_step`, `val_step`, and `test_step`, and the processed data is further passed to the model.
|
||||
|
||||
The parameters of the `SegDataPreProcessor` constructor:
|
||||
|
||||
- mean (Sequence\[Number\], optional) - The pixel mean of R, G, B channels. Defaults to None.
|
||||
- std (Sequence\[Number\], optional) - The pixel standard deviation of R, G, B channels. Defaults to None.
|
||||
- size (tuple, optional) - Fixed padding size.
|
||||
- size_divisor (int, optional) - The divisor of padded size.
|
||||
- pad_val (float, optional) - Padding value. Default: 0.
|
||||
- seg_pad_val (float, optional) - Padding value of segmentation map. Default: 255.
|
||||
- bgr_to_rgb (bool) - whether to convert image from BGR to RGB. Defaults to False.
|
||||
- rgb_to_bgr (bool) - whether to convert image from RGB to RGB. Defaults to False.
|
||||
- batch_augments (list\[dict\], optional) - Batch-level augmentations. Default to None.
|
||||
|
||||
The data will be processed as follows:
|
||||
|
||||
- Collate and move data to the target device.
|
||||
- Pad inputs to the input size with defined `pad_val`, and pad seg map with defined `seg_pad_val`.
|
||||
- Stack inputs to batch_inputs.
|
||||
- Convert inputs from bgr to rgb if the shape of input is (3, H, W).
|
||||
- Normalize image with defined std and mean.
|
||||
- Do batch augmentations like Mixup and Cutmix during training.
|
||||
|
||||
The parameters of the `forward` method:
|
||||
|
||||
- data (dict) - data sampled from dataloader.
|
||||
- training (bool) - Whether to enable training time augmentation.
|
||||
|
||||
The returns of the `forward` method:
|
||||
|
||||
- Dict: Data in the same format as the model input.
|
||||
|
@ -1 +1,104 @@
|
||||
# Structures
|
||||
|
||||
To unify input and output interfaces between different models and modules, OpenMMLab 2.0 MMEngine defines an abstract data structure,
|
||||
it has implemented basic functions of `Create`, `Read`, `Update`, `Delete`, supported data transferring among different types of devices
|
||||
and tensor-like or dictionary-like operations such as `.cpu()`, `.cuda()`, `.get()` and `.detach()`.
|
||||
More details can be found [here](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/data_element.md).
|
||||
|
||||
MMSegmentation also follows this interface protocol and defines `SegDataSample` which is used to encapsulate the data of semantic segmentation task.
|
||||
|
||||
## Semantic Segmentation Data SegDataSample
|
||||
|
||||
[SegDataSample](mmseg.structures.SegDataSample) includes three main fields `gt_sem_seg`, `pred_sem_seg` and `seg_logits`, which are used to store the annotation information and prediction results respectively.
|
||||
|
||||
| Field | Type | Description |
|
||||
| -------------- | ------------------------- | ------------------------------------------ |
|
||||
| gt_sem_seg | [`PixelData`](#pixeldata) | Annotation information. |
|
||||
| pred_instances | [`PixelData`](#pixeldata) | The predicted result. |
|
||||
| seg_logits | [`PixelData`](#pixeldata) | The raw (non-normalized) predicted result. |
|
||||
|
||||
The following sample code demonstrates the use of `SegDataSample`.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from mmengine.structures import PixelData
|
||||
from mmseg.structures import SegDataSample
|
||||
|
||||
img_meta = dict(img_shape=(4, 4, 3),
|
||||
pad_shape=(4, 4, 3))
|
||||
data_sample = SegDataSample()
|
||||
# defining gt_segmentations for encapsulate the ground truth data
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
|
||||
# add and process property in SegDataSample
|
||||
data_sample.gt_sem_seg = gt_segmentations
|
||||
assert 'gt_sem_seg' in data_sample
|
||||
assert 'sem_seg' in data_sample.gt_sem_seg
|
||||
assert 'img_shape' in data_sample.gt_sem_seg.metainfo_keys()
|
||||
print(data_sample.gt_sem_seg.shape)
|
||||
'''
|
||||
(4, 4)
|
||||
'''
|
||||
print(data_sample)
|
||||
'''
|
||||
<SegDataSample(
|
||||
|
||||
META INFORMATION
|
||||
|
||||
DATA FIELDS
|
||||
gt_sem_seg: <PixelData(
|
||||
|
||||
META INFORMATION
|
||||
img_shape: (4, 4, 3)
|
||||
pad_shape: (4, 4, 3)
|
||||
|
||||
DATA FIELDS
|
||||
data: tensor([[[1, 1, 1, 0],
|
||||
[1, 0, 1, 1],
|
||||
[1, 1, 1, 1],
|
||||
[0, 1, 0, 1]]])
|
||||
) at 0x1c2b4156460>
|
||||
) at 0x1c2aae44d60>
|
||||
'''
|
||||
|
||||
# delete and change property in SegDataSample
|
||||
data_sample = SegDataSample()
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
data_sample.gt_sem_seg = gt_segmentations
|
||||
data_sample.gt_sem_seg.set_metainfo(dict(img_shape=(4,4,9), pad_shape=(4,4,9)))
|
||||
del data_sample.gt_sem_seg.img_shape
|
||||
|
||||
# Tensor-like operations
|
||||
data_sample = SegDataSample()
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
cuda_gt_segmentations = gt_segmentations.cuda()
|
||||
cuda_gt_segmentations = gt_segmentations.to('cuda:0')
|
||||
cpu_gt_segmentations = cuda_gt_segmentations.cpu()
|
||||
cpu_gt_segmentations = cuda_gt_segmentations.to('cpu')
|
||||
```
|
||||
|
||||
## Customize New Property in SegDataSample
|
||||
|
||||
If you want to customize new property in `SegDataSample`, you may follow [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) below:
|
||||
|
||||
```python
|
||||
class SegDataSample(BaseDataElement):
|
||||
...
|
||||
|
||||
@property
|
||||
def xxx_property(self) -> xxxData:
|
||||
return self._xxx_property
|
||||
|
||||
@xxx_property.setter
|
||||
def xxx_property(self, value: xxxData) -> None:
|
||||
self.set_field(value, '_xxx_property', dtype=xxxData)
|
||||
|
||||
@xxx_property.deleter
|
||||
def xxx_property(self) -> None:
|
||||
del self._xxx_property
|
||||
```
|
||||
|
||||
Then a new property would be added to `SegDataSample`.
|
||||
|
@ -30,7 +30,7 @@ optimizers
|
||||
:members:
|
||||
|
||||
mmseg.evaluation
|
||||
--------------
|
||||
-----------------
|
||||
|
||||
metrics
|
||||
^^^^^^^^^^
|
||||
@ -75,18 +75,13 @@ necks
|
||||
.. automodule:: mmseg.models.necks
|
||||
:members:
|
||||
|
||||
mmseg.ops
|
||||
--------------
|
||||
.. automodule:: mmseg.ops
|
||||
:members:
|
||||
|
||||
mmseg.registry
|
||||
--------------
|
||||
.. automodule:: mmseg.registry
|
||||
:members:
|
||||
|
||||
mmseg.structures
|
||||
--------------
|
||||
-----------------
|
||||
|
||||
structures
|
||||
^^^^^^^^^^
|
||||
@ -104,6 +99,6 @@ mmseg.utils
|
||||
:members:
|
||||
|
||||
mmseg.visualization
|
||||
--------------
|
||||
----------------------
|
||||
.. automodule:: mmseg.visualization
|
||||
:members:
|
||||
|
@ -28,7 +28,7 @@ version_file = '../../mmseg/version.py'
|
||||
|
||||
|
||||
def get_version():
|
||||
with open(version_file, 'r') as f:
|
||||
with open(version_file) as f:
|
||||
exec(compile(f.read(), version_file, 'exec'))
|
||||
return locals()['__version__']
|
||||
|
||||
|
@ -42,8 +42,8 @@ We recommend that users follow our best practices to install MMSegmentation. How
|
||||
|
||||
```shell
|
||||
pip install -U openmim
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install mmengine
|
||||
mim install "mmcv>=2.0.0rc1"
|
||||
```
|
||||
|
||||
**Step 1.** Install MMSegmentation.
|
||||
@ -51,10 +51,8 @@ mim install mmengine
|
||||
Case a: If you develop and run mmseg directly, install it from source:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/open-mmlab/mmsegmentation.git
|
||||
git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git
|
||||
cd mmsegmentation
|
||||
git checkout dev-1.x
|
||||
# branch 'dev-1.x' set up to track remote branch 'dev-1.x' from 'origin'.
|
||||
pip install -v -e .
|
||||
# '-v' means verbose, or more output
|
||||
# '-e' means installing a project in editable mode,
|
||||
@ -64,7 +62,7 @@ pip install -v -e .
|
||||
Case b: If you use mmsegmentation as a dependency or third-party package, install it with pip:
|
||||
|
||||
```shell
|
||||
pip install 'mmsegmentation>=1.0.0rc0'
|
||||
pip install "mmsegmentation>=1.0.0rc0"
|
||||
```
|
||||
|
||||
### Verify the installation
|
||||
@ -159,8 +157,8 @@ thus we only need to install MMCV and MMSegmentation with the following commands
|
||||
|
||||
```shell
|
||||
!pip3 install openmim
|
||||
!mim install 'mmcv>=2.0.0rc1'
|
||||
!mim install mmengine
|
||||
!mim install "mmcv>=2.0.0rc1"
|
||||
```
|
||||
|
||||
**Step 2.** Install MMSegmentation from the source.
|
||||
|
@ -24,7 +24,7 @@ Welcome to MMSegmentation's documentation!
|
||||
:maxdepth: 1
|
||||
:caption: Migration
|
||||
|
||||
migration.md
|
||||
migration/index.rst
|
||||
|
||||
.. toctree::
|
||||
:caption: API Reference
|
||||
|
@ -1 +0,0 @@
|
||||
# Migration from MMSegmentation 0.x
|
8
docs/en/migration/index.rst
Normal file
8
docs/en/migration/index.rst
Normal file
@ -0,0 +1,8 @@
|
||||
Migration
|
||||
***************
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
interface.md
|
||||
package.md
|
441
docs/en/migration/interface.md
Normal file
441
docs/en/migration/interface.md
Normal file
@ -0,0 +1,441 @@
|
||||
# Migration from MMSegmentation 0.x
|
||||
|
||||
## Introduction
|
||||
|
||||
This guide describes the fundamental differences between MMSegmentation 0.x and MMSegmentation 1.x in terms of behaviors and the APIs, and how these all relate to your migration journey.
|
||||
|
||||
## New dependencies
|
||||
|
||||
MMSegmentation 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [installation tutorial](get_started.md).
|
||||
Or install the below packages manually.
|
||||
|
||||
1. [MMEngine](https://github.com/open-mmlab/mmengine): MMEngine is the core the OpenMMLab 2.0 architecture, and we splited many compentents unrelated to computer vision from MMCV to MMEngine.
|
||||
|
||||
2. [MMCV](https://github.com/open-mmlab/mmcv): The computer vision package of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **2.0.0rc1** version.
|
||||
|
||||
3. [MMClassification](https://github.com/open-mmlab/mmclassification)(Optional): The image classification toolbox and benchmark of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **1.0.0rc0** version.
|
||||
|
||||
## Train launch
|
||||
|
||||
The main improvement of OpenMMLab 2.0 is releasing MMEngine which provides universal and powerful runner for unified interfaces to launch training jobs.
|
||||
|
||||
Compared with MMSeg0.x, MMSeg1.x provides fewer command line arguments in `tools/train.py`
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Function</td>
|
||||
<td>Original</td>
|
||||
<td>New</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Loading pre-trained checkpoint</td>
|
||||
<td>--load_from=$CHECKPOINT</td>
|
||||
<td>--cfg-options load_from=$CHECKPOINT</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Resuming Train from specific checkpoint</td>
|
||||
<td>--resume-from=$CHECKPOINT</td>
|
||||
<td>--resume=$CHECKPOINT</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Resuming Train from the latest checkpoint</td>
|
||||
<td>--auto-resume</td>
|
||||
<td>--resume='auto'</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Whether not to evaluate the checkpoint during training</td>
|
||||
<td>--no-validate</td>
|
||||
<td>--cfg-options val_cfg=None val_dataloader=None val_evaluator=None</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Training device assignment</td>
|
||||
<td>--gpu-id=$DEVICE_ID</td>
|
||||
<td>-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Whether or not set different seeds for different ranks</td>
|
||||
<td>--diff-seed</td>
|
||||
<td>--cfg-options randomness.diff_rank_seed=True</td>
|
||||
</tr>
|
||||
<td>Whether to set deterministic options for CUDNN backend</td>
|
||||
<td>--deterministic</td>
|
||||
<td>--cfg-options randomness.deterministic=True</td>
|
||||
</table>
|
||||
|
||||
## Configuration file
|
||||
|
||||
### Model settings
|
||||
|
||||
No changes in `model.backbone`, `model.neck`, `model.decode_head` and `model.losses` fields.
|
||||
|
||||
Add `model.data_preprocessor` field to configure the `DataPreProcessor`, including:
|
||||
|
||||
- `mean`(Sequence, optional): The pixel mean of R, G, B channels. Defaults to None.
|
||||
|
||||
- `std`(Sequence, optional): The pixel standard deviation of R, G, B channels. Defaults to None.
|
||||
|
||||
- `size`(Sequence, optional): Fixed padding size.
|
||||
|
||||
- `size_divisor` (int, optional): The divisor of padded size.
|
||||
|
||||
- `seg_pad_val` (float, optional): Padding value of segmentation map. Default: 255.
|
||||
|
||||
- `padding_mode` (str): Type of padding. Default: 'constant'.
|
||||
|
||||
- constant: pads with a constant value, this value is specified with pad_val.
|
||||
|
||||
- `bgr_to_rgb` (bool): whether to convert image from BGR to RGB.Defaults to False.
|
||||
|
||||
- `rgb_to_bgr` (bool): whether to convert image from RGB to RGB. Defaults to False.
|
||||
|
||||
**Note:**
|
||||
Please refer [models documentation](../advanced_guides/models.md) for more details.
|
||||
|
||||
### Dataset settings
|
||||
|
||||
Changes in **data**:
|
||||
|
||||
The original `data` field is split to `train_dataloader`, `val_dataloader` and `test_dataloader`. This allows us to configure them in fine-grained. For example, you can specify different sampler and batch size during training and test.
|
||||
The `samples_per_gpu` is renamed to `batch_size`.
|
||||
The `workers_per_gpu` is renamed to `num_workers`.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(...),
|
||||
val=dict(...),
|
||||
test=dict(...),
|
||||
)
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
dataset=dict(...),
|
||||
sampler=dict(type='DefaultSampler', shuffle=True) # necessary
|
||||
)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
dataset=dict(...),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False) # necessary
|
||||
)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Changes in **pipeline**
|
||||
|
||||
- The original formatting transforms **`ToTensor`**、**`ImageToTensor`**、**`Collect`** are combined as [`PackSegInputs`](mmseg.datasets.transforms.PackSegInputs)
|
||||
- We don't recommend to do **`Normalize`** and **Pad** in the dataset pipeline. Please remove it from pipelines and set it in the `data_preprocessor` field.
|
||||
- The original **`Resize`** in MMSeg 1.x has been changed to **`RandomResize`** and the input arguments `img_scale` is renamed to `scale`, and the default value of `keep_ratio` is modified to False.
|
||||
|
||||
**Note:**
|
||||
We move some work of data transforms to the data preprocessor, like normalization, see [the documentation](package.md) for more details.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2560, 640),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Changes in **`evaluation`**:
|
||||
|
||||
- The **`evaluation`** field is split to `val_evaluator` and `test_evaluator`. And it won't support `interval` and `save_best` arguments.
|
||||
The `interval` is moved to `train_cfg.val_interval`, and the `save_best`
|
||||
is moved to `default_hooks.checkpoint.save_best`. `pre_eval` has been removed.
|
||||
- `'mIoU'` has been changed to `'IoUMetric'`.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
### Optimizer and Schedule settings
|
||||
|
||||
Changes in **`optimizer`** and **`optimizer_config`**:
|
||||
|
||||
- Now we use `optim_wrapper` field to specify all configuration about the optimization process. And the
|
||||
`optimizer` is a sub field of `optim_wrapper` now.
|
||||
- `paramwise_cfg` is also a sub field of `optim_wrapper`, instead of `optimizer`.
|
||||
- `optimizer_config` is removed now, and all configurations of it are moved to `optim_wrapper`.
|
||||
- `grad_clip` is renamed to `clip_grad`.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.0005)
|
||||
optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2))
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005),
|
||||
clip_grad=dict(max_norm=1, norm_type=2))
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Changes in **`lr_config`**:
|
||||
|
||||
- The `lr_config` field is removed and we use new `param_scheduler` to replace it.
|
||||
- The `warmup` related arguments are removed, since we use schedulers combination to implement this
|
||||
functionality.
|
||||
|
||||
The new schedulers combination mechanism is very flexible, and you can use it to design many kinds of learning
|
||||
rate / momentum curves. See [the tutorial](TODO) for more details.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
lr_config = dict(
|
||||
policy='poly',
|
||||
warmup='linear',
|
||||
warmup_iters=1500,
|
||||
warmup_ratio=1e-6,
|
||||
power=1.0,
|
||||
min_lr=0.0,
|
||||
by_epoch=False)
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
|
||||
dict(
|
||||
type='PolyLR',
|
||||
power=1.0,
|
||||
begin=1500,
|
||||
end=160000,
|
||||
eta_min=0.0,
|
||||
by_epoch=False,
|
||||
)
|
||||
]
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Changes in **`runner`**:
|
||||
|
||||
Most configuration in the original `runner` field is moved to `train_cfg`, `val_cfg` and `test_cfg`, which
|
||||
configure the loop in training, validation and test.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
runner = dict(type='IterBasedRunner', max_iters=20000)
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
# The `val_interval` is the original `evaluation.interval`.
|
||||
train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
|
||||
val_cfg = dict(type='ValLoop') # Use the default validation loop.
|
||||
test_cfg = dict(type='TestLoop') # Use the default test loop.
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
In fact, in OpenMMLab 2.0, we introduced `Loop` to control the behaviors in training, validation and test. The functionalities of `Runner` are also changed. You can find more details of [runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md)
|
||||
in [MMEngine](https://github.com/open-mmlab/mmengine/).
|
||||
|
||||
### Runtime settings
|
||||
|
||||
Changes in **`checkpoint_config`** and **`log_config`**:
|
||||
|
||||
The `checkpoint_config` are moved to `default_hooks.checkpoint` and the `log_config` are moved to `default_hooks.logger`.
|
||||
And we move many hooks settings from the script code to the `default_hooks` field in the runtime configuration.
|
||||
|
||||
```python
|
||||
default_hooks = dict(
|
||||
# record the time of every iterations.
|
||||
timer=dict(type='IterTimerHook'),
|
||||
|
||||
# print log every 50 iterations.
|
||||
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
||||
|
||||
# enable the parameter scheduler.
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
|
||||
# save checkpoint every 2000 iterations.
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
|
||||
|
||||
# set sampler seed in distributed environment.
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
|
||||
# validation results visualization.
|
||||
visualization=dict(type='SegVisualizationHook'))
|
||||
```
|
||||
|
||||
In addition, we split the original logger to logger and visualizer. The logger is used to record
|
||||
information and the visualizer is used to show the logger in different backends, like terminal and TensorBoard.
|
||||
|
||||
<table class="docutils">
|
||||
<tr>
|
||||
<td>Original</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
log_config = dict(
|
||||
interval=100,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook'),
|
||||
dict(type='TensorboardLoggerHook'),
|
||||
])
|
||||
```
|
||||
|
||||
</td>
|
||||
<tr>
|
||||
<td>New</td>
|
||||
<td>
|
||||
|
||||
```python
|
||||
default_hooks = dict(
|
||||
...
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
)
|
||||
vis_backends = [dict(type='LocalVisBackend'),
|
||||
dict(type='TensorboardVisBackend')]
|
||||
visualizer = dict(
|
||||
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
```
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Changes in **`load_from`** and **`resume_from`**:
|
||||
|
||||
- The `resume_from` is removed. And we use `resume` and `load_from` to replace it.
|
||||
- If `resume=True` and `load_from` is **not None**, resume training from the checkpoint in `load_from`.
|
||||
- If `resume=True` and `load_from` is **None**, try to resume from the latest checkpoint in the work directory.
|
||||
- If `resume=False` and `load_from` is **not None**, only load the checkpoint, not resume training.
|
||||
- If `resume=False` and `load_from` is **None**, do not load nor resume.
|
||||
|
||||
Changes in **`dist_params`**: The `dist_params` field is a sub field of `env_cfg` now. And there are some new
|
||||
configurations in the `env_cfg`.
|
||||
|
||||
```python
|
||||
env_cfg = dict(
|
||||
# whether to enable cudnn benchmark
|
||||
cudnn_benchmark=False,
|
||||
|
||||
# set multi process parameters
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
|
||||
# set distributed parameters
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
```
|
||||
|
||||
Changes in **`workflow`**: `workflow` related functionalities are removed.
|
||||
|
||||
New field **`visualizer`**: The visualizer is a new design in OpenMMLab 2.0 architecture. We use a
|
||||
visualizer instance in the runner to handle results & log visualization and save to different backends.
|
||||
See the [visualization tutorial](user_guides/visualization.md) for more details.
|
||||
|
||||
New field **`default_scope`**: The start point to search module for all registries. The `default_scope` in MMSegmentation is `mmseg`. See [the registry tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/registry.md) for more details.
|
114
docs/en/migration/package.md
Normal file
114
docs/en/migration/package.md
Normal file
@ -0,0 +1,114 @@
|
||||
# Package structures changes
|
||||
|
||||
This section is included if you are curious about what has changed between MMSeg 0.x and 1.x.
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td>MMSegmentation 0.x</td>
|
||||
<td>MMSegmentation 1.x</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mmseg.api</td>
|
||||
<td>mmseg.api</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor=#fcf7f7>- mmseg.core</td>
|
||||
<td bgcolor=#ecf4eb>+ mmseg.engine</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mmseg.datasets</td>
|
||||
<td>mmseg.datasets</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mmseg.models</td>
|
||||
<td>mmseg.models</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td bgcolor=#fcf7f7>- mmseg.ops</td>
|
||||
<td bgcolor=#ecf4eb>+ mmseg.structure</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mmseg.utils</td>
|
||||
<td>mmseg.utils</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td bgcolor=#ecf4eb>+ mmseg.evaluation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td bgcolor=#ecf4eb>+ mmseg.registry</td>
|
||||
<tr>
|
||||
</table>
|
||||
|
||||
## Removed packages
|
||||
|
||||
### `mmseg.core`
|
||||
|
||||
In OpenMMLab 2.0, `core` package has been removed. `hooks` and `optimizers` of `core` are moved in `mmseg.engine`, and `evaluation` in `core` is mmseg.evaluation currently.
|
||||
|
||||
## `mmseg.ops`
|
||||
|
||||
`ops` package included `encoding` and `wrappers`, which are moved in `mmseg.models.utils`.
|
||||
|
||||
## Added packages
|
||||
|
||||
### `mmseg.engine`
|
||||
|
||||
OpenMMLab 2.0 adds a new foundational library for training deep learning, MMEngine. It servers as the training engine of all OpenMMLab codebases.
|
||||
`engine` package of mmseg is some customized modules for semantic segmentation task, like `SegVisualizationHook` which works for visualizing segmentation mask.
|
||||
|
||||
### `mmseg.structure`
|
||||
|
||||
In OpenMMLab 2.0, we designed data structure for computer vision task, and in mmseg, we implements `SegDataSample` in `structure` package.
|
||||
|
||||
### `mmseg.evaluation`
|
||||
|
||||
We move all evaluation metric in `mmseg.evaluation`.
|
||||
|
||||
### `mmseg.registry`
|
||||
|
||||
We moved registry implementations for all kinds of modules in MMSegmentation in `mmseg.registry`.
|
||||
|
||||
## Modified packages
|
||||
|
||||
### `mmseg.apis`
|
||||
|
||||
OpenMMLab 2.0 tries to support unified interface for multitasking of Computer Vision,
|
||||
and releases much stronger [`Runner`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md),
|
||||
so MMSeg 1.x removed modules in `train.py` and `test.py` renamed `init_segmentor` to `init_model` and `inference_segmentor` to `inference_model`
|
||||
Here is the changes of `mmseg.apis`:
|
||||
|
||||
| Function | Changes |
|
||||
| :-------------------: | :---------------------------------------------- |
|
||||
| `init_segmentor` | Renamed to `init_model` |
|
||||
| `inference_segmentor` | Rename to `inference_segmentor` |
|
||||
| `show_result_pyplot` | Implemented based on `SegLocalVisualizer` |
|
||||
| `train_model` | Removed, use `runner.train` to train. |
|
||||
| `multi_gpu_test` | Removed, use `runner.test` to test. |
|
||||
| `single_gpu_test` | Removed, use `runner.test` to test. |
|
||||
| `set_random_seed` | Removed, use `mmengine.runner.set_random_seed`. |
|
||||
| `init_random_seed` | Removed, use `mmengine.dist.sync_random_seed`. |
|
||||
|
||||
### `mmseg.datasets`
|
||||
|
||||
OpenMMLab 2.0 defines the `BaseDataset` to function and interface of dataset, and MMSegmentation 1.x also follow this protocol and defines the `BaseSegDataset` inherited from `BaseDataset`. MMCV 2.x collects general data transforms for multiple tasks e.g. classification, detection, segmentation, so MMSegmentation 1.x uses these data transforms and removes them from mmseg.datasets
|
||||
|
||||
| Packages/Modules | Changes |
|
||||
| :-------------------: | :------------------------------------------------------------------------------------------ |
|
||||
| `mmseg.pipelines` | Renamed to `mmseg.transforms` |
|
||||
| `mmseg.sampler` | Move in `mmengine.dataset.sampler` |
|
||||
| `CustomDataset` | Renamed to `BaseDataset` and inherited from `BaseDataset` in MMEngine |
|
||||
| `DefaultFormatBundle` | Replaced with `PackSegInputs` |
|
||||
| `LoadImageFromFile` | Move in `mmcv.transforms.LoadImageFromFile` |
|
||||
| `LoadAnnotations` | Moved in `mmcv.transforms.LoadAnnotations` |
|
||||
| `Resize` | Moved in `mmcv.transforms` and split into `Resize`, `RandomResize` and `RandomChoiseResize` |
|
||||
| `RandomFlip` | Moved in `mmcv.transforms.RandomFlip` |
|
||||
| `Pad` | Moved in `mmcv.transforms.Pad` |
|
||||
| `Normalize` | Moved in `mmcv.transforms.Normalize` |
|
||||
| `Compose` | Moved in `mmcv.transforms.Compose` |
|
||||
| `ImageToTensor` | Moved in `mmcv.transforms.ImageToTensor` |
|
||||
|
||||
### `mmseg.models`
|
||||
|
||||
`models` has not changed a lot, just added the `encoding` and `wrappers` from previous `mmseg.ops`
|
@ -1,5 +1,54 @@
|
||||
# Changelog of v1.x
|
||||
|
||||
## v1.0.0rc1 (2/11/2022)
|
||||
|
||||
### Highlights
|
||||
|
||||
- Support PoolFormer ([#2191](https://github.com/open-mmlab/mmsegmentation/pull/2191))
|
||||
- Add Decathlon dataset ([#2227](https://github.com/open-mmlab/mmsegmentation/pull/2227))
|
||||
|
||||
### Features
|
||||
|
||||
- Add BioMedical data loading ([#2176](https://github.com/open-mmlab/mmsegmentation/pull/2176))
|
||||
- Add LIP dataset ([#2251](https://github.com/open-mmlab/mmsegmentation/pull/2251))
|
||||
- Add `GenerateEdge` data transform ([#2210](https://github.com/open-mmlab/mmsegmentation/pull/2210))
|
||||
|
||||
### Bug fix
|
||||
|
||||
- Fix segmenter-vit-s_fcn config ([#2037](https://github.com/open-mmlab/mmsegmentation/pull/2037))
|
||||
- Fix binary segmentation ([#2101](https://github.com/open-mmlab/mmsegmentation/pull/2101))
|
||||
- Fix MMSegmentation colab demo ([#2089](https://github.com/open-mmlab/mmsegmentation/pull/2089))
|
||||
- Fix ResizeToMultiple transform ([#2185](https://github.com/open-mmlab/mmsegmentation/pull/2185))
|
||||
- Use SyncBN in mobilenet_v2 ([#2198](https://github.com/open-mmlab/mmsegmentation/pull/2198))
|
||||
- Fix typo in installation ([#2175](https://github.com/open-mmlab/mmsegmentation/pull/2175))
|
||||
- Fix typo in visualization.md ([#2116](https://github.com/open-mmlab/mmsegmentation/pull/2116))
|
||||
|
||||
### Enhancement
|
||||
|
||||
- Add mim extras_requires in setup.py ([#2012](https://github.com/open-mmlab/mmsegmentation/pull/2012))
|
||||
- Fix CI ([#2029](https://github.com/open-mmlab/mmsegmentation/pull/2029))
|
||||
- Remove ops module ([#2063](https://github.com/open-mmlab/mmsegmentation/pull/2063))
|
||||
- Add pyupgrade pre-commit hook ([#2078](https://github.com/open-mmlab/mmsegmentation/pull/2078))
|
||||
- Add `out_file` in `add_datasample` of `SegLocalVisualizer` to directly save image ([#2090](https://github.com/open-mmlab/mmsegmentation/pull/2090))
|
||||
- Upgrade pre commit hooks ([#2154](https://github.com/open-mmlab/mmsegmentation/pull/2154))
|
||||
- Ignore test timm in CI when torch\<1.7 ([#2158](https://github.com/open-mmlab/mmsegmentation/pull/2158))
|
||||
- Update requirements ([#2186](https://github.com/open-mmlab/mmsegmentation/pull/2186))
|
||||
- Fix Windows platform CI ([#2202](https://github.com/open-mmlab/mmsegmentation/pull/2202))
|
||||
|
||||
### Documentation
|
||||
|
||||
- Add `Overview` documentation ([#2042](https://github.com/open-mmlab/mmsegmentation/pull/2042))
|
||||
- Add `Evaluation` documentation ([#2077](https://github.com/open-mmlab/mmsegmentation/pull/2077))
|
||||
- Add `Migration` documentation ([#2066](https://github.com/open-mmlab/mmsegmentation/pull/2066))
|
||||
- Add `Structures` documentation ([#2070](https://github.com/open-mmlab/mmsegmentation/pull/2070))
|
||||
- Add `Structures` ZN documentation ([#2129](https://github.com/open-mmlab/mmsegmentation/pull/2129))
|
||||
- Add `Engine` ZN documentation ([#2157](https://github.com/open-mmlab/mmsegmentation/pull/2157))
|
||||
- Update `Prepare datasets` and `Visualization` doc ([#2054](https://github.com/open-mmlab/mmsegmentation/pull/2054))
|
||||
- Update `Models` documentation ([#2160](https://github.com/open-mmlab/mmsegmentation/pull/2160))
|
||||
- Update `Add New Modules` documentation ([#2067](https://github.com/open-mmlab/mmsegmentation/pull/2067))
|
||||
- Fix the installation commands in get_started.md ([#2174](https://github.com/open-mmlab/mmsegmentation/pull/2174))
|
||||
- Add MMYOLO to README.md ([#2220](https://github.com/open-mmlab/mmsegmentation/pull/2220))
|
||||
|
||||
## v1.0.0rc0 (31/8/2022)
|
||||
|
||||
We are excited to announce the release of MMSegmentation 1.0.0rc0.
|
||||
|
@ -15,9 +15,9 @@
|
||||
|
||||
**New Features**
|
||||
|
||||
- Support MAE: Masked Autoencoders Are Scalable Vision Learners ([1307](https://github.com/open-mmlab/mmsegmentation/pull/1307), [1523](https://github.com/open-mmlab/mmsegmentation/pull/1523))
|
||||
- Support Resnet strikes back ([1390](https://github.com/open-mmlab/mmsegmentation/pull/1390))
|
||||
- Support extra dataloader settings in configs ([1435](https://github.com/open-mmlab/mmsegmentation/pull/1435))
|
||||
- Support MAE: Masked Autoencoders Are Scalable Vision Learners ([#1307](https://github.com/open-mmlab/mmsegmentation/pull/1307), [#1523](https://github.com/open-mmlab/mmsegmentation/pull/1523))
|
||||
- Support Resnet strikes back ([#1390](https://github.com/open-mmlab/mmsegmentation/pull/1390))
|
||||
- Support extra dataloader settings in configs ([#1435](https://github.com/open-mmlab/mmsegmentation/pull/1435))
|
||||
|
||||
**Bug Fixes**
|
||||
|
||||
|
@ -8,7 +8,8 @@ The compatible MMSegmentation and MMCV versions are as below. Please install the
|
||||
|
||||
| MMSegmentation version | MMCV version | MMClassification version |
|
||||
| :--------------------: | :-------------------------: | :----------------------: |
|
||||
| 1.0.0rc0 | mmcv-full >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
|
||||
| 1.0.0rc1 | mmcv >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
|
||||
| 1.0.0rc0 | mmcv >= 2.0.0rc1 | mmcls>=1.0.0rc0 |
|
||||
| master | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
|
||||
| 0.24.1 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
|
||||
| 0.23.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 |
|
||||
|
@ -1 +1,85 @@
|
||||
# Overview
|
||||
|
||||
This chapter introduces you to the framework of MMSegmentation, and the basic conception of semantic segmentation. It also provides links to detailed tutorials about MMSegmentation.
|
||||
|
||||
## What is semantic segmentation?
|
||||
|
||||
Semantic segmentation is the task of clustering parts of an image together that belong to the same object class.
|
||||
It is a form of pixel-level prediction because each pixel in an image is classified according to a category.
|
||||
Some example benchmarks for this task are [Cityscapes](https://www.cityscapes-dataset.com/benchmarks/), [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) and [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/).
|
||||
Models are usually evaluated with the Mean Intersection-Over-Union (Mean IoU) and Pixel Accuracy metrics.
|
||||
|
||||
## What is MMSegmentation?
|
||||
|
||||
MMSegmentation is a toolbox that provides a framework for unified implementation and evaluation of semant
|
||||
ic segmentation methods,
|
||||
and contains high-quality implementations of popular semantic segmentation methods and datasets.
|
||||
|
||||
MMSeg consists of 7 main parts including apis, structures, datasets, models, engine, evaluation and visualization.
|
||||
|
||||
- **apis** provides high-level APIs for model inference.
|
||||
|
||||
- **structures** provides segmentation data structure `SegDataSample`.
|
||||
|
||||
- **datasets** supports various datasets for semantic segmentation.
|
||||
|
||||
- **transforms** contains a lot of useful data augmentation transforms.
|
||||
|
||||
- **models** is the most vital part for segmentors and contains different components of a segmentor.
|
||||
|
||||
- **segmentors** defines all of the segmentation model classes.
|
||||
- **data_preprocessors** works for preprocessing the input data of the model.
|
||||
- **backbones** contains various backbone networks that transform an image to feature maps.
|
||||
- **necks** contains various neck components that connect the backbone and heads.
|
||||
- **decode_heads** contains various head components that take feature map as input and predict segmentation results.
|
||||
- **losses** contains various loss functions.
|
||||
|
||||
- **engine** is a part for runtime components that extends function of [MMEngine](https://github.com/open-mmlab/mmengine).
|
||||
|
||||
- **optimizers** provides optimizers and optimizer wrappers.
|
||||
- **hooks** provides various hooks of the runner.
|
||||
|
||||
- **evaluation** provides different metrics for evaluating model performance.
|
||||
|
||||
- **visualization** is for visualizing segmentation results.
|
||||
|
||||
## How to use this documentation
|
||||
|
||||
Here is a detailed step-by-step guide to learn more about MMSegmentation:
|
||||
|
||||
1. For installation instructions, please see [get_started](getting_started.md).
|
||||
|
||||
2. For beginners, MMSegmentation is the best place to start the journey of semantic segmentation
|
||||
as there are many SOTA and classic segmentation [models](model_zoo.md),
|
||||
and it is easier to carry out a segmentation task by plugging together building blocks and convenient high-level apis.
|
||||
Refer to the tutorials below for the basic usage of MMSegmentation:
|
||||
|
||||
- [Config](user_guides/1_config.md)
|
||||
- [Dataset Preparation](user_guides/2_dataset_prepare.md)
|
||||
- [Inference](user_guides/3_inference.md)
|
||||
- [Train and Test](user_guides/4_train_test.md)
|
||||
|
||||
3. If you would like to learn about the fundamental classes and features that make MMSegmentation work,
|
||||
please refer to the tutorials below to dive deeper:
|
||||
|
||||
- [Data flow](advanced_guides/data_flow.md)
|
||||
- [Structures](advanced_guides/structures.md)
|
||||
- [Models](advanced_guides/models.md)
|
||||
- [Datasets](advanced_guides/datasets.md)
|
||||
- [Evaluation](advanced_guides/evaluation.md)
|
||||
|
||||
4. MMSegmentation also provide tutorials for customization and advanced research,
|
||||
please refer to the below guides to build your own segmentation project:
|
||||
|
||||
- [Add new models](advanced_guides/add_models.md)
|
||||
- [Add new datasets](advanced_guides/add_dataset.md)
|
||||
- [Add new transforms](advanced_guides/add_transform.md)
|
||||
- [Customize runtime](advanced_guides/customize_runtime.md)
|
||||
|
||||
5. If you are more familiar with MMSegmentation v0.x, there is documentation about migration from MMSegmentation v0.x to v1.x
|
||||
|
||||
- [migration](migration/index.rst)
|
||||
|
||||
## References
|
||||
|
||||
- https://paperswithcode.com/task/semantic-segmentation/codeless#task-home
|
||||
|
@ -18,13 +18,15 @@ num_ckpts = 0
|
||||
for f in files:
|
||||
url = osp.dirname(f.replace('../../', url_prefix))
|
||||
|
||||
with open(f, 'r') as content_file:
|
||||
with open(f) as content_file:
|
||||
content = content_file.read()
|
||||
|
||||
title = content.split('\n')[0].replace('#', '').strip()
|
||||
ckpts = set(x.lower().strip()
|
||||
for x in re.findall(r'https?://download.*\.pth', content)
|
||||
if 'mmsegmentation' in x)
|
||||
ckpts = {
|
||||
x.lower().strip()
|
||||
for x in re.findall(r'https?://download.*\.pth', content)
|
||||
if 'mmsegmentation' in x
|
||||
}
|
||||
if len(ckpts) == 0:
|
||||
continue
|
||||
|
||||
@ -34,7 +36,7 @@ for f in files:
|
||||
assert len(_papertype) > 0
|
||||
papertype = _papertype[0]
|
||||
|
||||
paper = set([(papertype, title)])
|
||||
paper = {(papertype, title)}
|
||||
|
||||
titles.append(title)
|
||||
num_ckpts += len(ckpts)
|
||||
|
@ -112,7 +112,7 @@ model = dict(
|
||||
loss_weight=0.4)), # Loss weight of auxiliary_head.
|
||||
# model training and testing settings
|
||||
train_cfg=dict(), # train_cfg is just a place holder for now.
|
||||
test_cfg=dict(mode='whole')) # The test mode, options are 'whole' and 'sliding'. 'whole': whole image fully-convolutional test. 'sliding': sliding crop window on the image.
|
||||
test_cfg=dict(mode='whole')) # The test mode, options are 'whole' and 'slide'. 'whole': whole image fully-convolutional test. 'slide': sliding crop window on the image.
|
||||
```
|
||||
|
||||
`_base_/datasets/cityscapes.py` is the configuration file of the dataset
|
||||
|
@ -145,7 +145,7 @@ mmsegmentation
|
||||
The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration.
|
||||
|
||||
By convention, `**labelTrainIds.png` are used for cityscapes training.
|
||||
We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
|
||||
We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/1.x/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)
|
||||
to generate `**labelTrainIds.png`.
|
||||
|
||||
```shell
|
||||
@ -351,7 +351,8 @@ The dataset is a Large-scale Dataset for Instance Segmentation (also have segman
|
||||
|
||||
You may need to follow the following structure for dataset preparation after downloading iSAID dataset.
|
||||
|
||||
```
|
||||
```none
|
||||
├── data
|
||||
│ ├── iSAID
|
||||
│ │ ├── train
|
||||
│ │ │ ├── images
|
||||
@ -376,3 +377,40 @@ python tools/dataset_converters/isaid.py /path/to/iSAID
|
||||
```
|
||||
|
||||
In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation.
|
||||
|
||||
## LIP(Look Into Person) dataset
|
||||
|
||||
This dataset could be download from [this page](https://lip.sysuhcp.com/overview.php).
|
||||
|
||||
Please run the following commands to unzip dataset.
|
||||
|
||||
```shell
|
||||
unzip LIP.zip
|
||||
cd LIP
|
||||
unzip TrainVal_images.zip
|
||||
unzip TrainVal_parsing_annotations.zip
|
||||
cd TrainVal_parsing_annotations
|
||||
unzip TrainVal_parsing_annotations.zip
|
||||
mv train_segmentations ../
|
||||
mv val_segmentations ../
|
||||
cd ..
|
||||
```
|
||||
|
||||
The contents of LIP datasets include:
|
||||
|
||||
```none
|
||||
├── data
|
||||
│ ├── LIP
|
||||
│ │ ├── train_images
|
||||
│ │ │ ├── 1000_1234574.jpg
|
||||
│ │ │ ├── ...
|
||||
│ │ ├── train_segmentations
|
||||
│ │ │ ├── 1000_1234574.png
|
||||
│ │ │ ├── ...
|
||||
│ │ ├── val_images
|
||||
│ │ │ ├── 100034_483681.jpg
|
||||
│ │ │ ├── ...
|
||||
│ │ ├── val_segmentations
|
||||
│ │ │ ├── 100034_483681.png
|
||||
│ │ │ ├── ...
|
||||
```
|
||||
|
@ -38,7 +38,7 @@ Find the `vis_data` path of `work_dir` after starting training, for example, the
|
||||
work_dirs/test_visual/20220810_115248/vis_data
|
||||
```
|
||||
|
||||
The scalar file in vis_data path includes learning rate, losses and data_time etc, also record metrics results and you can refer [logging tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/logging.html) in mmengine to log custom data. The tensorboard visualization results are executed with the following command:
|
||||
The scalar file in vis_data path includes learning rate, losses and data_time etc, also record metrics results and you can refer [logging tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/logging.html) in MMEngine to log custom data. The tensorboard visualization results are executed with the following command:
|
||||
|
||||
```shell
|
||||
tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
|
||||
@ -46,9 +46,11 @@ tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
|
||||
|
||||
## Data and Results visualization
|
||||
|
||||
MMSegmentation provides `SegVisualizationHook` that can render segmentation masks of ground truth and prediction. Users can modify `default_hooks` at each `schedule_x.py` config file.
|
||||
### Visualizer Data Samples during Model Testing or Validation
|
||||
|
||||
For exsample, In `_base_/schedules/schedule_20k.py`, modify the `SegVisualizationHook` configuration, set `draw` to `True` to enable the storage of network inference results, `interval` indicates the sampling interval of the prediction results, and when set to 1, each inference result of the network will be saved. `interval` is set to 50 by default:
|
||||
MMSegmentation provides `SegVisualizationHook` which is a [hook](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md) working to visualize ground truth and prediction of segmentation during model testing and evaluation. Its configuration is in `default_hooks`, please see [Runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/runner.md) for more details.
|
||||
|
||||
For example, In `_base_/schedules/schedule_20k.py`, modify the `SegVisualizationHook` configuration, set `draw` to `True` to enable the storage of network inference results, `interval` indicates the sampling interval of the prediction results, and when set to 1, each inference result of the network will be saved. `interval` is set to 50 by default:
|
||||
|
||||
```python
|
||||
default_hooks = dict(
|
||||
@ -76,4 +78,97 @@ we can also run the following command to view them in TensorBoard:
|
||||
tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data
|
||||
```
|
||||
|
||||
If you would like to know more visualization usage, you can refer to [visualization tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/visualization.html) in mmengie.
|
||||
### Visualize a Single Data Sample
|
||||
|
||||
If you want to visualize a single data sample, we suggest to use `SegLocalVisualizer`.
|
||||
|
||||
`SegLocalVisualizer` is child class inherits from `Visualizer` in MMEngine and works for MMSegmentation visualization, for more details about `Visualizer` please refer to [visualization tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md) in MMEngine.
|
||||
|
||||
Here is an example about `SegLocalVisualizer`, first you may download example data below by following commands:
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png" width="70%"/>
|
||||
</div>
|
||||
|
||||
```shell
|
||||
wget https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png --output-document aachen_000000_000019_leftImg8bit.png
|
||||
wget https://user-images.githubusercontent.com/24582831/189833143-15f60f8a-4d1e-4cbb-a6e7-5e2233869fac.png --output-document aachen_000000_000019_gtFine_labelTrainIds.png
|
||||
```
|
||||
|
||||
Then you can find their local path and use the scripts below to visualize:
|
||||
|
||||
```python
|
||||
import mmcv
|
||||
import os.path as osp
|
||||
import torch
|
||||
# `PixelData` is data structure for pixel-level annotations or predictions defined in MMEngine.
|
||||
# Please refer to below tutorial file of data structures in MMEngine:
|
||||
# https://github.com/open-mmlab/mmengine/tree/main/docs/en/advanced_tutorials/data_element.md
|
||||
|
||||
from mmengine.structures import PixelData
|
||||
|
||||
# `SegDataSample` is data structure interface between different components
|
||||
# defined in MMSegmentation, it includes ground truth, prediction and
|
||||
# predicted logits of semantic segmentation.
|
||||
# Please refer to below tutorial file of `SegDataSample` for more details:
|
||||
# https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/advanced_guides/structures.md
|
||||
|
||||
from mmseg.structures import SegDataSample
|
||||
from mmseg.visualization import SegLocalVisualizer
|
||||
|
||||
out_file = 'out_file_cityscapes'
|
||||
save_dir = './work_dirs'
|
||||
|
||||
image = mmcv.imread(
|
||||
osp.join(
|
||||
osp.dirname(__file__),
|
||||
'./aachen_000000_000019_leftImg8bit.png'
|
||||
),
|
||||
'color')
|
||||
sem_seg = mmcv.imread(
|
||||
osp.join(
|
||||
osp.dirname(__file__),
|
||||
'./aachen_000000_000019_gtFine_labelTrainIds.png' # noqa
|
||||
),
|
||||
'unchanged')
|
||||
sem_seg = torch.from_numpy(sem_seg)
|
||||
gt_sem_seg_data = dict(data=sem_seg)
|
||||
gt_sem_seg = PixelData(**gt_sem_seg_data)
|
||||
data_sample = SegDataSample()
|
||||
data_sample.gt_sem_seg = gt_sem_seg
|
||||
|
||||
seg_local_visualizer = SegLocalVisualizer(
|
||||
vis_backends=[dict(type='LocalVisBackend')],
|
||||
save_dir=save_dir)
|
||||
|
||||
# The meta information of dataset usually includes `classes` for class names and
|
||||
# `palette` for visualization color of each foreground.
|
||||
# All class names and palettes are defined in the file:
|
||||
# https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/utils/class_names.py
|
||||
|
||||
seg_local_visualizer.dataset_meta = dict(
|
||||
classes=('road', 'sidewalk', 'building', 'wall', 'fence',
|
||||
'pole', 'traffic light', 'traffic sign',
|
||||
'vegetation', 'terrain', 'sky', 'person', 'rider',
|
||||
'car', 'truck', 'bus', 'train', 'motorcycle',
|
||||
'bicycle'),
|
||||
palette=[[128, 64, 128], [244, 35, 232], [70, 70, 70],
|
||||
[102, 102, 156], [190, 153, 153], [153, 153, 153],
|
||||
[250, 170, 30], [220, 220, 0], [107, 142, 35],
|
||||
[152, 251, 152], [70, 130, 180], [220, 20, 60],
|
||||
[255, 0, 0], [0, 0, 142], [0, 0, 70],
|
||||
[0, 60, 100], [0, 80, 100], [0, 0, 230],
|
||||
[119, 11, 32]])
|
||||
# When `show=True`, the results would be shown directly,
|
||||
# else if `show=False`, the results would be saved in local directory folder.
|
||||
seg_local_visualizer.add_datasample(out_file, image,
|
||||
data_sample, show=False)
|
||||
```
|
||||
|
||||
Then the visualization result of image with its corresponding ground truth could be found in `./work_dirs/vis_data/vis_image/` whose name is `out_file_cityscapes_0.png`:
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/24582831/189835713-c0534054-4bfa-4b75-9254-0afbeb5ff02e.png" width="70%"/>
|
||||
</div>
|
||||
|
||||
If you would like to know more visualization usage, you can refer to [visualization tutorial](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/visualization.html) in MMEngine.
|
||||
|
@ -1 +1,163 @@
|
||||
# 训练引擎
|
||||
|
||||
## 钩子 (Hook)
|
||||
|
||||
### 介绍
|
||||
|
||||
OpenMMLab 将模型训练和测试过程抽象为 `Runner`, 插入钩子可以实现在 `Runner` 中不同的训练和测试节点 (例如 "每个训练 iter 前后", "每个验证 iter 前后" 等不同阶段) 所需要的相应功能. 更多钩子机制的介绍可以参考[这里](https://www.calltutors.com/blog/what-is-hook).
|
||||
|
||||
`Runner` 中所使用的钩子分为两类:
|
||||
|
||||
- 默认钩子 (default hooks)
|
||||
|
||||
它们实现了训练时所必需的功能,在配置文件中用 `default_hooks` 定义传给 `Runner`, `Runner` 通过 [`register_default_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1780) 方法注册.
|
||||
钩子有对应的优先级, 优先级越高, 越早被执行器调用. 如果优先级一样, 被调用的顺序和钩子注册的顺序一致.
|
||||
不建议用户修改默认钩子的优先级,可以参考 [mmengine hooks 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/hook.md) 了解钩子优先级的定义.
|
||||
下面是 MMSegmentation 中所用到的默认钩子:
|
||||
|
||||
| 钩子 | 用法 | 优先级 |
|
||||
| :-----------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------: | :---------------: |
|
||||
| [IterTimerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/iter_timer_hook.py) | 记录 iteration 花费的时间. | NORMAL (50) |
|
||||
| [LoggerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py) | 从 `Runner` 里不同的组件中收集日志记录,并将其输出到终端, JSON 文件,tensorboard,wandb 等下游. | BELOW_NORMAL (60) |
|
||||
| [ParamSchedulerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/param_scheduler_hook.py) | 更新优化器里面的一些超参数,例如学习率的动量. | LOW (70) |
|
||||
| [CheckpointHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) | 规律性地保存 checkpoint 文件. | VERY_LOW (90) |
|
||||
| [DistSamplerSeedHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sampler_seed_hook.py) | 确保分布式采样器 shuffle 是打开的. | NORMAL (50) |
|
||||
| [SegVisualizationHook](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/visualization/local_visualizer.py) | 可视化验证和测试过程里的预测结果. | NORMAL (50) |
|
||||
|
||||
它们在配置文件中的配置为:
|
||||
|
||||
```python
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='SegVisualizationHook'))
|
||||
```
|
||||
|
||||
以上默认钩子除 `SegVisualizationHook` 外都是在 MMEngine 中所实现, `SegVisualizationHook` 是在 MMSegmentation 里被实现的钩子, 之后会专门介绍.
|
||||
|
||||
- 自定义钩子 (custom hooks)
|
||||
|
||||
自定义钩子在配置通过 `custom_hooks` 定义, `Runner` 通过 [`register_custom_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1852) 方法注册.
|
||||
自定义钩子优先级需要在配置文件里设置, 如果没有设置, 则会被默认设置为 `NORMAL`. 下面是部分 MMEngine 中实现的自定义钩子:
|
||||
|
||||
| 钩子 | 用法 |
|
||||
| :----------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------: |
|
||||
| [EMAHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/ema_hook.py) | 在模型训练时使用指数滑动平均 (Exponential Moving Average, EMA). |
|
||||
| [EmptyCacheHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/empty_cache_hook.py) | 在训练时释放所有没有被缓存占用的 GPU 显存. |
|
||||
| [SyncBuffersHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sync_buffer_hook.py) | 在每个训练 Epoch 结束时同步模型 buffer 里的参数例如 BN 里的 `running_mean` 和 `running_var`. |
|
||||
|
||||
以下是 `EMAHook` 的用例, 配置文件中, 将已经实现的自定义钩子的配置作为 `custom_hooks` 列表中的成员.
|
||||
|
||||
```python
|
||||
custom_hooks = [
|
||||
dict(type='EMAHook', start_iters=500, priority='NORMAL')
|
||||
]
|
||||
```
|
||||
|
||||
### SegVisualizationHook
|
||||
|
||||
MMSegmentation 实现了 [`SegVisualizationHook`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/engine/hooks/visualization_hook.py#L17), 用来在验证和测试时可视化预测结果.
|
||||
`SegVisualizationHook` 重写了基类 `Hook` 中的 `_after_iter` 方法, 在验证或测试时, 根据指定的迭代次数间隔调用 `visualizer` 的 `add_datasample` 方法绘制语义分割结果,具体实现如下:
|
||||
|
||||
```python
|
||||
...
|
||||
@HOOKS.register_module()
|
||||
class SegVisualizationHook(Hook):
|
||||
...
|
||||
def _after_iter(self,
|
||||
runner: Runner,
|
||||
batch_idx: int,
|
||||
data_batch: dict,
|
||||
outputs: Sequence[SegDataSample],
|
||||
mode: str = 'val') -> None:
|
||||
...
|
||||
# 如果是训练阶段或者 self.draw 为 False 则直接跳出
|
||||
if self.draw is False or mode == 'train':
|
||||
return
|
||||
...
|
||||
if self.every_n_inner_iters(batch_idx, self.interval):
|
||||
for output in outputs:
|
||||
img_path = output.img_path
|
||||
img_bytes = self.file_client.get(img_path)
|
||||
img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
|
||||
window_name = f'{mode}_{osp.basename(img_path)}'
|
||||
|
||||
self._visualizer.add_datasample(
|
||||
window_name,
|
||||
img,
|
||||
data_sample=output,
|
||||
show=self.show,
|
||||
wait_time=self.wait_time,
|
||||
step=runner.iter)
|
||||
|
||||
```
|
||||
|
||||
关于可视化更多的细节可以查看[这里](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/visualization.md).
|
||||
|
||||
## 优化器
|
||||
|
||||
### 优化器封装
|
||||
|
||||
OpenMMLab 2.0 设计了优化器封装, 它支持不同的训练策略, 包括混合精度训练、梯度累加和梯度截断等, 用户可以根据需求选择合适的训练策略.
|
||||
优化器封装还定义了一套标准的参数更新流程, 用户可以基于这一套流程, 在同一套代码里, 实现不同训练策略的切换. 如果想了解更多, 可以参考 [MMEngine 优化器封装文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md).
|
||||
|
||||
MMSegmenetation 训练模型也是使用优化器封装来优化参数, 以下是 MMSegmentation 中常用的使用方法:
|
||||
|
||||
#### 配置 PyTorch 支持的优化器
|
||||
|
||||
OpenMMLab 2.0 支持 PyTorch 原生所有优化器, 参考[这里](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md#%E7%AE%80%E5%8D%95%E9%85%8D%E7%BD%AE).
|
||||
在配置文件中设置训练时 `Runner` 所使用的优化器, 需要定义 `optim_wrapper`, 例如配置使用 SGD 优化器:
|
||||
|
||||
```python
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005),
|
||||
clip_grad=None)
|
||||
```
|
||||
|
||||
#### `paramwise_cfg` 参数
|
||||
|
||||
在模型训练中, 如果想在优化器里为不同参数设置优化策略, 例如设置不同的学习率、权重衰减, 可以通过设置 `paramwise_cfg` 来实现.
|
||||
|
||||
例如, 在使用 ViT 作为模型骨干网络进行训练时, 优化器中设置了权重衰减 (weight decay), 但对 position embedding, layer normalization 和 class token 参数需要关掉 weight decay, `optim_wrapper` 的配置[如下](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py#L15-L27):
|
||||
|
||||
```python
|
||||
optimizer = dict(
|
||||
type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
|
||||
optim_wrapper = dict(
|
||||
_delete_=True,
|
||||
type='OptimWrapper',
|
||||
optimizer=optimizer,
|
||||
paramwise_cfg=dict(
|
||||
custom_keys={
|
||||
'pos_embed': dict(decay_mult=0.),
|
||||
'cls_token': dict(decay_mult=0.),
|
||||
'norm': dict(decay_mult=0.)
|
||||
}))
|
||||
```
|
||||
|
||||
其中 `decay_mult` 指的是对应参数的权重衰减的系数. 关于更多 `paramwise_cfg` 的使用可以参考 [MMEngine 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/optim_wrapper.md).
|
||||
|
||||
### 优化器封装构造器
|
||||
|
||||
默认的优化器封装构造器 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 根据输入的 `optim_wrapper` 和 `optim_wrapper` 中定义的 `paramwise_cfg` 来构建训练中使用的优化器. 当 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 功能不能满足需求时, 可以自定义优化器封装构造器来实现超参数的配置.
|
||||
|
||||
MMSegmentation 中的实现了 [`LearningRateDecayOptimizerConstructor`](https://github.com/open-mmlab/mmsegmentation/blob/b21df463d47447f33c28d9a4f46136ad64d34a40/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py#L104), 可以对以 ConvNeXt, BEiT 和 MAE 为骨干网络的模型训练时, 骨干网络的模型参数的学习率按照定义的衰减比例(`decay_rate`)逐层递减, 在配置文件中的配置如下:
|
||||
|
||||
```python
|
||||
optim_wrapper = dict(
|
||||
_delete_=True,
|
||||
type='AmpOptimWrapper',
|
||||
optimizer=dict(
|
||||
type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
|
||||
paramwise_cfg={
|
||||
'decay_rate': 0.9,
|
||||
'decay_type': 'stage_wise',
|
||||
'num_layers': 12
|
||||
},
|
||||
constructor='LearningRateDecayOptimizerConstructor',
|
||||
loss_scale='dynamic')
|
||||
```
|
||||
|
@ -1 +1,102 @@
|
||||
# 数据结构
|
||||
|
||||
为了统一模型和各功能模块之间的输入和输出的接口, 在 OpenMMLab 2.0 MMEngine 中定义了一套抽象数据结构, 实现了基础的增/删/查/改功能, 支持不同设备间的数据迁移, 也支持了如
|
||||
`.cpu()`, `.cuda()`, `.get()` 和 `.detach()` 的类字典和张量的操作。具体可以参考 [MMEngine 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/data_element.md)。
|
||||
|
||||
同样的, MMSegmentation 亦遵循了 OpenMMLab 2.0 各模块间的接口协议, 定义了 `SegDataSample` 用来封装语义分割任务所需要的数据。
|
||||
|
||||
## 语义分割数据 SegDataSample
|
||||
|
||||
[SegDataSample](mmseg.structures.SegDataSample) 包括了三个主要数据字段 `gt_sem_seg`, `pred_sem_seg` 和 `seg_logits`, 分别用来存放标注信息, 预测结果和预测的未归一化前的 logits 值。
|
||||
|
||||
| 字段 | 类型 | 描述 |
|
||||
| -------------- | ------------------------- | ------------------------------- |
|
||||
| gt_sem_seg | [`PixelData`](#pixeldata) | 图像标注信息. |
|
||||
| pred_instances | [`PixelData`](#pixeldata) | 图像预测结果. |
|
||||
| seg_logits | [`PixelData`](#pixeldata) | 模型预测未归一化前的 logits 值. |
|
||||
|
||||
以下示例代码展示了 `SegDataSample` 的使用方法:
|
||||
|
||||
```python
|
||||
import torch
|
||||
from mmengine.structures import PixelData
|
||||
from mmseg.structures import SegDataSample
|
||||
|
||||
img_meta = dict(img_shape=(4, 4, 3),
|
||||
pad_shape=(4, 4, 3))
|
||||
data_sample = SegDataSample()
|
||||
# 定义 gt_segmentations 用于封装模型的输出信息
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
|
||||
# 增加和处理 SegDataSample 中的属性
|
||||
data_sample.gt_sem_seg = gt_segmentations
|
||||
assert 'gt_sem_seg' in data_sample
|
||||
assert 'data' in data_sample.gt_sem_seg
|
||||
assert 'img_shape' in data_sample.gt_sem_seg.metainfo_keys()
|
||||
print(data_sample.gt_sem_seg.shape)
|
||||
'''
|
||||
(4, 4)
|
||||
'''
|
||||
print(data_sample)
|
||||
'''
|
||||
<SegDataSample(
|
||||
|
||||
META INFORMATION
|
||||
|
||||
DATA FIELDS
|
||||
gt_sem_seg: <PixelData(
|
||||
|
||||
META INFORMATION
|
||||
img_shape: (4, 4, 3)
|
||||
pad_shape: (4, 4, 3)
|
||||
|
||||
DATA FIELDS
|
||||
data: tensor([[[1, 1, 1, 0],
|
||||
[1, 0, 1, 1],
|
||||
[1, 1, 1, 1],
|
||||
[0, 1, 0, 1]]])
|
||||
) at 0x1c2b4156460>
|
||||
) at 0x1c2aae44d60>
|
||||
'''
|
||||
|
||||
# 删除和修改 SegDataSample 中的属性
|
||||
data_sample = SegDataSample()
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
data_sample.gt_sem_seg = gt_segmentations
|
||||
data_sample.gt_sem_seg.set_metainfo(dict(img_shape=(4,4,9), pad_shape=(4,4,9)))
|
||||
del data_sample.gt_sem_seg.img_shape
|
||||
|
||||
# 类张量的操作
|
||||
data_sample = SegDataSample()
|
||||
gt_segmentations = PixelData(metainfo=img_meta)
|
||||
gt_segmentations.data = torch.randint(0, 2, (1, 4, 4))
|
||||
cuda_gt_segmentations = gt_segmentations.cuda()
|
||||
cuda_gt_segmentations = gt_segmentations.to('cuda:0')
|
||||
cpu_gt_segmentations = cuda_gt_segmentations.cpu()
|
||||
cpu_gt_segmentations = cuda_gt_segmentations.to('cpu')
|
||||
```
|
||||
|
||||
## 在 SegDataSample 中自定义新的属性
|
||||
|
||||
如果你想在 `SegDataSample` 中自定义新的属性,你可以参考下面的 [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 示例:
|
||||
|
||||
```python
|
||||
class SegDataSample(BaseDataElement):
|
||||
...
|
||||
|
||||
@property
|
||||
def xxx_property(self) -> xxxData:
|
||||
return self._xxx_property
|
||||
|
||||
@xxx_property.setter
|
||||
def xxx_property(self, value: xxxData) -> None:
|
||||
self.set_field(value, '_xxx_property', dtype=xxxData)
|
||||
|
||||
@xxx_property.deleter
|
||||
def xxx_property(self) -> None:
|
||||
del self._xxx_property
|
||||
```
|
||||
|
||||
这样一个新的属性 `xxx_property` 就将被增加到 `SegDataSample` 里面了。
|
||||
|
@ -75,11 +75,6 @@ necks
|
||||
.. automodule:: mmseg.models.necks
|
||||
:members:
|
||||
|
||||
mmseg.ops
|
||||
--------------
|
||||
.. automodule:: mmseg.ops
|
||||
:members:
|
||||
|
||||
mmseg.registry
|
||||
--------------
|
||||
.. automodule:: mmseg.registry
|
||||
|
@ -28,7 +28,7 @@ version_file = '../../mmseg/version.py'
|
||||
|
||||
|
||||
def get_version():
|
||||
with open(version_file, 'r') as f:
|
||||
with open(version_file) as f:
|
||||
exec(compile(f.read(), version_file, 'exec'))
|
||||
return locals()['__version__']
|
||||
|
||||
|
@ -18,13 +18,15 @@ num_ckpts = 0
|
||||
for f in files:
|
||||
url = osp.dirname(f.replace('../../', url_prefix))
|
||||
|
||||
with open(f, 'r') as content_file:
|
||||
with open(f) as content_file:
|
||||
content = content_file.read()
|
||||
|
||||
title = content.split('\n')[0].replace('#', '').strip()
|
||||
ckpts = set(x.lower().strip()
|
||||
for x in re.findall(r'https?://download.*\.pth', content)
|
||||
if 'mmsegmentation' in x)
|
||||
ckpts = {
|
||||
x.lower().strip()
|
||||
for x in re.findall(r'https?://download.*\.pth', content)
|
||||
if 'mmsegmentation' in x
|
||||
}
|
||||
if len(ckpts) == 0:
|
||||
continue
|
||||
|
||||
@ -34,7 +36,7 @@ for f in files:
|
||||
assert len(_papertype) > 0
|
||||
papertype = _papertype[0]
|
||||
|
||||
paper = set([(papertype, title)])
|
||||
paper = {(papertype, title)}
|
||||
|
||||
titles.append(title)
|
||||
num_ckpts += len(ckpts)
|
||||
|
@ -10,6 +10,7 @@ from .version import __version__, version_info
|
||||
MMCV_MIN = '2.0.0rc1'
|
||||
MMCV_MAX = '2.1.0'
|
||||
MMENGINE_MIN = '0.1.0'
|
||||
MMENGINE_MAX = '1.0.0'
|
||||
|
||||
|
||||
def digit_version(version_str: str, length: int = 4):
|
||||
@ -62,10 +63,12 @@ assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \
|
||||
f'Please install mmcv>={mmcv_min_version}, <{mmcv_max_version}.'
|
||||
|
||||
mmengine_min_version = digit_version(MMENGINE_MIN)
|
||||
mmengine_max_version = digit_version(MMENGINE_MAX)
|
||||
mmengine_version = digit_version(mmengine.__version__)
|
||||
|
||||
assert (mmengine_min_version <= mmengine_version), \
|
||||
assert (mmengine_min_version <= mmengine_version < mmengine_max_version), \
|
||||
f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
|
||||
f'Please install mmengine>={mmengine_min_version}.'
|
||||
f'Please install mmengine>={mmengine_min_version}, '\
|
||||
f'<{mmengine_max_version}.'
|
||||
|
||||
__all__ = ['__version__', 'version_info', 'digit_version']
|
||||
|
@ -102,7 +102,7 @@ def _preprare_data(imgs: ImageType, model: BaseSegmentor):
|
||||
is_batch = False
|
||||
|
||||
if isinstance(imgs[0], np.ndarray):
|
||||
cfg.test_pipeline[0].type = 'LoadImageFromNDArray'
|
||||
cfg.test_pipeline[0]['type'] = 'LoadImageFromNDArray'
|
||||
|
||||
# TODO: Consider using the singleton pattern to avoid building
|
||||
# a pipeline for each inference
|
||||
@ -203,9 +203,8 @@ def show_result_pyplot(model: BaseSegmentor,
|
||||
draw_gt=draw_gt,
|
||||
draw_pred=draw_pred,
|
||||
wait_time=wait_time,
|
||||
out_file=out_file,
|
||||
show=show)
|
||||
vis_img = visualizer.get_image()
|
||||
if out_file is not None:
|
||||
mmcv.imwrite(vis_img, out_file)
|
||||
|
||||
return vis_img
|
||||
|
@ -6,20 +6,23 @@ from .cityscapes import CityscapesDataset
|
||||
from .coco_stuff import COCOStuffDataset
|
||||
from .dark_zurich import DarkZurichDataset
|
||||
from .dataset_wrappers import MultiImageMixDataset
|
||||
from .decathlon import DecathlonDataset
|
||||
from .drive import DRIVEDataset
|
||||
from .hrf import HRFDataset
|
||||
from .isaid import iSAIDDataset
|
||||
from .isprs import ISPRSDataset
|
||||
from .lip import LIPDataset
|
||||
from .loveda import LoveDADataset
|
||||
from .night_driving import NightDrivingDataset
|
||||
from .pascal_context import PascalContextDataset, PascalContextDataset59
|
||||
from .potsdam import PotsdamDataset
|
||||
from .stare import STAREDataset
|
||||
from .transforms import (CLAHE, AdjustGamma, LoadAnnotations,
|
||||
LoadImageFromNDArray, PackSegInputs,
|
||||
PhotoMetricDistortion, RandomCrop, RandomCutOut,
|
||||
RandomMosaic, RandomRotate, Rerange, ResizeToMultiple,
|
||||
RGB2Gray, SegRescale)
|
||||
from .transforms import (CLAHE, AdjustGamma, GenerateEdge, LoadAnnotations,
|
||||
LoadBiomedicalAnnotation, LoadBiomedicalData,
|
||||
LoadBiomedicalImageFromFile, LoadImageFromNDArray,
|
||||
PackSegInputs, PhotoMetricDistortion, RandomCrop,
|
||||
RandomCutOut, RandomMosaic, RandomRotate, Rerange,
|
||||
ResizeToMultiple, RGB2Gray, SegRescale)
|
||||
from .voc import PascalVOCDataset
|
||||
|
||||
__all__ = [
|
||||
@ -31,5 +34,7 @@ __all__ = [
|
||||
'LoadAnnotations', 'RandomCrop', 'SegRescale', 'PhotoMetricDistortion',
|
||||
'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray',
|
||||
'RandomCutOut', 'RandomMosaic', 'PackSegInputs', 'ResizeToMultiple',
|
||||
'LoadImageFromNDArray'
|
||||
'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile',
|
||||
'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge',
|
||||
'DecathlonDataset', 'LIPDataset'
|
||||
]
|
||||
|
@ -85,7 +85,7 @@ class BaseSegDataset(BaseDataset):
|
||||
seg_map_suffix='.png',
|
||||
metainfo: Optional[dict] = None,
|
||||
data_root: Optional[str] = None,
|
||||
data_prefix: dict = dict(img_path=None, seg_map_path=None),
|
||||
data_prefix: dict = dict(img_path='', seg_map_path=''),
|
||||
filter_cfg: Optional[dict] = None,
|
||||
indices: Optional[Union[int, Sequence[int]]] = None,
|
||||
serialize_data: bool = True,
|
||||
@ -132,9 +132,6 @@ class BaseSegDataset(BaseDataset):
|
||||
# if it is not defined
|
||||
updated_palette = self._update_palette()
|
||||
self._metainfo.update(dict(palette=updated_palette))
|
||||
if test_mode:
|
||||
assert self._metainfo.get('classes') is not None, \
|
||||
'dataset metainfo `classes` should be specified when testing'
|
||||
|
||||
# Join paths.
|
||||
if self.data_root is not None:
|
||||
@ -146,6 +143,10 @@ class BaseSegDataset(BaseDataset):
|
||||
if not lazy_init:
|
||||
self.full_init()
|
||||
|
||||
if test_mode:
|
||||
assert self._metainfo.get('classes') is not None, \
|
||||
'dataset metainfo `classes` should be specified when testing'
|
||||
|
||||
@classmethod
|
||||
def get_label_map(cls,
|
||||
new_classes: Optional[Sequence] = None
|
||||
|
96
mmseg/datasets/decathlon.py
Normal file
96
mmseg/datasets/decathlon.py
Normal file
@ -0,0 +1,96 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
import os.path as osp
|
||||
from typing import List
|
||||
|
||||
from mmengine.fileio import load
|
||||
|
||||
from mmseg.registry import DATASETS
|
||||
from .basesegdataset import BaseSegDataset
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
class DecathlonDataset(BaseSegDataset):
|
||||
"""Dataset for Dacathlon dataset.
|
||||
|
||||
The dataset.json format is shown as follows
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{
|
||||
"name": "BRATS",
|
||||
"tensorImageSize": "4D",
|
||||
"modality":
|
||||
{
|
||||
"0": "FLAIR",
|
||||
"1": "T1w",
|
||||
"2": "t1gd",
|
||||
"3": "T2w"
|
||||
},
|
||||
"labels": {
|
||||
"0": "background",
|
||||
"1": "edema",
|
||||
"2": "non-enhancing tumor",
|
||||
"3": "enhancing tumour"
|
||||
},
|
||||
"numTraining": 484,
|
||||
"numTest": 266,
|
||||
"training":
|
||||
[
|
||||
{
|
||||
"image": "./imagesTr/BRATS_306.nii.gz"
|
||||
"label": "./labelsTr/BRATS_306.nii.gz"
|
||||
...
|
||||
}
|
||||
]
|
||||
"test":
|
||||
[
|
||||
"./imagesTs/BRATS_557.nii.gz"
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
def load_data_list(self) -> List[dict]:
|
||||
"""Load annotation from directory or annotation file.
|
||||
|
||||
Returns:
|
||||
list[dict]: All data info of dataset.
|
||||
"""
|
||||
# `self.ann_file` denotes the absolute annotation file path if
|
||||
# `self.root=None` or relative path if `self.root=/path/to/data/`.
|
||||
annotations = load(self.ann_file)
|
||||
if not isinstance(annotations, dict):
|
||||
raise TypeError(f'The annotations loaded from annotation file '
|
||||
f'should be a dict, but got {type(annotations)}!')
|
||||
raw_data_list = annotations[
|
||||
'training'] if not self.test_mode else annotations['test']
|
||||
data_list = []
|
||||
for raw_data_info in raw_data_list:
|
||||
# `2:` works for removing './' in file path, which will break
|
||||
# loading from cloud storage.
|
||||
if isinstance(raw_data_info, dict):
|
||||
data_info = dict(
|
||||
img_path=osp.join(self.data_root, raw_data_info['image']
|
||||
[2:]))
|
||||
data_info['seg_map_path'] = osp.join(
|
||||
self.data_root, raw_data_info['label'][2:])
|
||||
else:
|
||||
data_info = dict(
|
||||
img_path=osp.join(self.data_root, raw_data_info)[2:])
|
||||
data_info['label_map'] = self.label_map
|
||||
data_info['reduce_zero_label'] = self.reduce_zero_label
|
||||
data_info['seg_fields'] = []
|
||||
data_list.append(data_info)
|
||||
annotations.pop('training')
|
||||
annotations.pop('test')
|
||||
|
||||
metainfo = copy.deepcopy(annotations)
|
||||
metainfo['classes'] = [*metainfo['labels'].values()]
|
||||
# Meta information load from annotation file will not influence the
|
||||
# existed meta information load from `BaseDataset.METAINFO` and
|
||||
# `metainfo` arguments defined in constructor.
|
||||
for k, v in metainfo.items():
|
||||
self._metainfo.setdefault(k, v)
|
||||
|
||||
return data_list
|
43
mmseg/datasets/lip.py
Normal file
43
mmseg/datasets/lip.py
Normal file
@ -0,0 +1,43 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmseg.registry import DATASETS
|
||||
from .basesegdataset import BaseSegDataset
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
class LIPDataset(BaseSegDataset):
|
||||
"""LIP dataset.
|
||||
|
||||
The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to
|
||||
'.png'.
|
||||
"""
|
||||
METAINFO = dict(
|
||||
classes=('Background', 'Hat', 'Hair', 'Glove', 'Sunglasses',
|
||||
'UpperClothes', 'Dress', 'Coat', 'Socks', 'Pants',
|
||||
'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm',
|
||||
'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe',
|
||||
'Right-shoe'),
|
||||
palette=(
|
||||
[0, 0, 0],
|
||||
[128, 0, 0],
|
||||
[255, 0, 0],
|
||||
[0, 85, 0],
|
||||
[170, 0, 51],
|
||||
[255, 85, 0],
|
||||
[0, 0, 85],
|
||||
[0, 119, 221],
|
||||
[85, 85, 0],
|
||||
[0, 85, 85],
|
||||
[85, 51, 0],
|
||||
[52, 86, 128],
|
||||
[0, 128, 0],
|
||||
[0, 0, 255],
|
||||
[51, 170, 221],
|
||||
[0, 255, 255],
|
||||
[85, 255, 170],
|
||||
[170, 255, 85],
|
||||
[255, 255, 0],
|
||||
[255, 170, 0],
|
||||
))
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)
|
@ -1,13 +1,17 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .formatting import PackSegInputs
|
||||
from .loading import LoadAnnotations, LoadImageFromNDArray
|
||||
from .transforms import (CLAHE, AdjustGamma, PhotoMetricDistortion, RandomCrop,
|
||||
RandomCutOut, RandomMosaic, RandomRotate, Rerange,
|
||||
ResizeToMultiple, RGB2Gray, SegRescale)
|
||||
from .loading import (LoadAnnotations, LoadBiomedicalAnnotation,
|
||||
LoadBiomedicalData, LoadBiomedicalImageFromFile,
|
||||
LoadImageFromNDArray)
|
||||
from .transforms import (CLAHE, AdjustGamma, GenerateEdge,
|
||||
PhotoMetricDistortion, RandomCrop, RandomCutOut,
|
||||
RandomMosaic, RandomRotate, Rerange, ResizeToMultiple,
|
||||
RGB2Gray, SegRescale)
|
||||
|
||||
__all__ = [
|
||||
'LoadAnnotations', 'RandomCrop', 'SegRescale', 'PhotoMetricDistortion',
|
||||
'RandomRotate', 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray',
|
||||
'RandomCutOut', 'RandomMosaic', 'PackSegInputs', 'ResizeToMultiple',
|
||||
'LoadImageFromNDArray'
|
||||
'LoadImageFromNDArray', 'LoadBiomedicalImageFromFile',
|
||||
'LoadBiomedicalAnnotation', 'LoadBiomedicalData', 'GenerateEdge'
|
||||
]
|
||||
|
@ -1,12 +1,16 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import warnings
|
||||
from typing import Dict
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
from mmcv.transforms import BaseTransform
|
||||
from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations
|
||||
from mmcv.transforms import LoadImageFromFile
|
||||
|
||||
from mmseg.registry import TRANSFORMS
|
||||
from mmseg.utils import datafrombytes
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
@ -168,3 +172,273 @@ class LoadImageFromNDArray(LoadImageFromFile):
|
||||
results['img_shape'] = img.shape[:2]
|
||||
results['ori_shape'] = img.shape[:2]
|
||||
return results
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class LoadBiomedicalImageFromFile(BaseTransform):
|
||||
"""Load an biomedical mage from file.
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img_path
|
||||
|
||||
Added Keys:
|
||||
|
||||
- img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default,
|
||||
N is the number of modalities, and data type is float32
|
||||
if set to_float32 = True, or float64 if decode_backend is 'nifti' and
|
||||
to_float32 is False.
|
||||
- img_shape
|
||||
- ori_shape
|
||||
|
||||
Args:
|
||||
decode_backend (str): The data decoding backend type. Options are
|
||||
'numpy'and 'nifti', and there is a convention that when backend is
|
||||
'nifti' the axis of data loaded is XYZ, and when backend is
|
||||
'numpy', the the axis is ZYX. The data will be transposed if the
|
||||
backend is 'nifti'. Defaults to 'nifti'.
|
||||
to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
|
||||
Defaults to False.
|
||||
to_float32 (bool): Whether to convert the loaded image to a float32
|
||||
numpy array. If set to False, the loaded image is an float64 array.
|
||||
Defaults to True.
|
||||
file_client_args (dict): Arguments to instantiate a FileClient.
|
||||
See :class:`mmengine.fileio.FileClient` for details.
|
||||
Defaults to ``dict(backend='disk')``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
decode_backend: str = 'nifti',
|
||||
to_xyz: bool = False,
|
||||
to_float32: bool = True,
|
||||
file_client_args: dict = dict(backend='disk')
|
||||
) -> None:
|
||||
self.decode_backend = decode_backend
|
||||
self.to_xyz = to_xyz
|
||||
self.to_float32 = to_float32
|
||||
self.file_client_args = file_client_args.copy()
|
||||
self.file_client = mmengine.FileClient(**self.file_client_args)
|
||||
|
||||
def transform(self, results: Dict) -> Dict:
|
||||
"""Functions to load image.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
|
||||
|
||||
Returns:
|
||||
dict: The dict contains loaded image and meta information.
|
||||
"""
|
||||
|
||||
filename = results['img_path']
|
||||
|
||||
data_bytes = self.file_client.get(filename)
|
||||
img = datafrombytes(data_bytes, backend=self.decode_backend)
|
||||
|
||||
if self.to_float32:
|
||||
img = img.astype(np.float32)
|
||||
|
||||
if len(img.shape) == 3:
|
||||
img = img[None, ...]
|
||||
|
||||
if self.decode_backend == 'nifti':
|
||||
img = img.transpose(0, 3, 2, 1)
|
||||
|
||||
if self.to_xyz:
|
||||
img = img.transpose(0, 3, 2, 1)
|
||||
|
||||
results['img'] = img
|
||||
results['img_shape'] = img.shape[1:]
|
||||
results['ori_shape'] = img.shape[1:]
|
||||
return results
|
||||
|
||||
def __repr__(self):
|
||||
repr_str = (f'{self.__class__.__name__}('
|
||||
f"decode_backend='{self.decode_backend}', "
|
||||
f'to_xyz={self.to_xyz}, '
|
||||
f'to_float32={self.to_float32}, '
|
||||
f'file_client_args={self.file_client_args})')
|
||||
return repr_str
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class LoadBiomedicalAnnotation(BaseTransform):
|
||||
"""Load ``seg_map`` annotation provided by biomedical dataset.
|
||||
|
||||
The annotation format is as the following:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
'gt_seg_map': np.ndarray (X, Y, Z) or (Z, Y, X)
|
||||
}
|
||||
|
||||
Required Keys:
|
||||
|
||||
- seg_map_path
|
||||
|
||||
Added Keys:
|
||||
|
||||
- gt_seg_map (np.ndarray): Biomedical seg map with shape (Z, Y, X) by
|
||||
default, and data type is float32 if set to_float32 = True, or
|
||||
float64 if decode_backend is 'nifti' and to_float32 is False.
|
||||
|
||||
Args:
|
||||
decode_backend (str): The data decoding backend type. Options are
|
||||
'numpy'and 'nifti', and there is a convention that when backend is
|
||||
'nifti' the axis of data loaded is XYZ, and when backend is
|
||||
'numpy', the the axis is ZYX. The data will be transposed if the
|
||||
backend is 'nifti'. Defaults to 'nifti'.
|
||||
to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
|
||||
Defaults to False.
|
||||
to_float32 (bool): Whether to convert the loaded seg map to a float32
|
||||
numpy array. If set to False, the loaded image is an float64 array.
|
||||
Defaults to True.
|
||||
file_client_args (dict): Arguments to instantiate a FileClient.
|
||||
See :class:`mmengine.fileio.FileClient` for details.
|
||||
Defaults to ``dict(backend='disk')``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
decode_backend: str = 'nifti',
|
||||
to_xyz: bool = False,
|
||||
to_float32: bool = True,
|
||||
file_client_args: dict = dict(backend='disk')
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.decode_backend = decode_backend
|
||||
self.to_xyz = to_xyz
|
||||
self.to_float32 = to_float32
|
||||
self.file_client_args = file_client_args.copy()
|
||||
self.file_client = mmengine.FileClient(**self.file_client_args)
|
||||
|
||||
def transform(self, results: Dict) -> Dict:
|
||||
"""Functions to load image.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
|
||||
|
||||
Returns:
|
||||
dict: The dict contains loaded image and meta information.
|
||||
"""
|
||||
data_bytes = self.file_client.get(results['seg_map_path'])
|
||||
gt_seg_map = datafrombytes(data_bytes, backend=self.decode_backend)
|
||||
|
||||
if self.to_float32:
|
||||
gt_seg_map = gt_seg_map.astype(np.float32)
|
||||
|
||||
if self.decode_backend == 'nifti':
|
||||
gt_seg_map = gt_seg_map.transpose(2, 1, 0)
|
||||
|
||||
if self.to_xyz:
|
||||
gt_seg_map = gt_seg_map.transpose(2, 1, 0)
|
||||
|
||||
results['gt_seg_map'] = gt_seg_map
|
||||
return results
|
||||
|
||||
def __repr__(self):
|
||||
repr_str = (f'{self.__class__.__name__}('
|
||||
f"decode_backend='{self.decode_backend}', "
|
||||
f'to_xyz={self.to_xyz}, '
|
||||
f'to_float32={self.to_float32}, '
|
||||
f'file_client_args={self.file_client_args})')
|
||||
return repr_str
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class LoadBiomedicalData(BaseTransform):
|
||||
"""Load an biomedical image and annotation from file.
|
||||
|
||||
The loading data format is as the following:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
'img': np.ndarray data[:-1, X, Y, Z]
|
||||
'seg_map': np.ndarray data[-1, X, Y, Z]
|
||||
}
|
||||
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img_path
|
||||
|
||||
Added Keys:
|
||||
|
||||
- img (np.ndarray): Biomedical image with shape (N, Z, Y, X) by default,
|
||||
N is the number of modalities.
|
||||
- gt_seg_map (np.ndarray, optional): Biomedical seg map with shape
|
||||
(Z, Y, X) by default.
|
||||
- img_shape
|
||||
- ori_shape
|
||||
|
||||
Args:
|
||||
with_seg (bool): Whether to parse and load the semantic segmentation
|
||||
annotation. Defaults to False.
|
||||
decode_backend (str): The data decoding backend type. Options are
|
||||
'numpy'and 'nifti', and there is a convention that when backend is
|
||||
'nifti' the axis of data loaded is XYZ, and when backend is
|
||||
'numpy', the the axis is ZYX. The data will be transposed if the
|
||||
backend is 'nifti'. Defaults to 'nifti'.
|
||||
to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z.
|
||||
Defaults to False.
|
||||
file_client_args (dict): Arguments to instantiate a FileClient.
|
||||
See :class:`mmengine.fileio.FileClient` for details.
|
||||
Defaults to ``dict(backend='disk')``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
with_seg=False,
|
||||
decode_backend: str = 'numpy',
|
||||
to_xyz: bool = False,
|
||||
file_client_args: dict = dict(backend='disk')
|
||||
) -> None:
|
||||
self.with_seg = with_seg
|
||||
self.decode_backend = decode_backend
|
||||
self.to_xyz = to_xyz
|
||||
self.file_client_args = file_client_args.copy()
|
||||
self.file_client = mmengine.FileClient(**self.file_client_args)
|
||||
|
||||
def transform(self, results: Dict) -> Dict:
|
||||
"""Functions to load image.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
|
||||
|
||||
Returns:
|
||||
dict: The dict contains loaded image and meta information.
|
||||
"""
|
||||
data_bytes = self.file_client.get(results['img_path'])
|
||||
data = datafrombytes(data_bytes, backend=self.decode_backend)
|
||||
# img is 4D data (N, X, Y, Z), N is the number of protocol
|
||||
img = data[:-1, :]
|
||||
|
||||
if self.decode_backend == 'nifti':
|
||||
img = img.transpose(0, 3, 2, 1)
|
||||
|
||||
if self.to_xyz:
|
||||
img = img.transpose(0, 3, 2, 1)
|
||||
|
||||
results['img'] = img
|
||||
results['img_shape'] = img.shape[1:]
|
||||
results['ori_shape'] = img.shape[1:]
|
||||
|
||||
if self.with_seg:
|
||||
gt_seg_map = data[-1, :]
|
||||
if self.decode_backend == 'nifti':
|
||||
gt_seg_map = gt_seg_map.transpose(2, 1, 0)
|
||||
|
||||
if self.to_xyz:
|
||||
gt_seg_map = gt_seg_map.transpose(2, 1, 0)
|
||||
results['gt_seg_map'] = gt_seg_map
|
||||
return results
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr_str = (f'{self.__class__.__name__}('
|
||||
f'with_seg={self.with_seg}, '
|
||||
f"decode_backend='{self.decode_backend}', "
|
||||
f'to_xyz={self.to_xyz}, '
|
||||
f'file_client_args={self.file_client_args})')
|
||||
return repr_str
|
||||
|
@ -1,7 +1,8 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
from typing import Sequence, Tuple, Union
|
||||
from typing import Dict, Sequence, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import mmcv
|
||||
import numpy as np
|
||||
from mmcv.transforms.base import BaseTransform
|
||||
@ -59,8 +60,8 @@ class ResizeToMultiple(BaseTransform):
|
||||
if self.interpolation else 'bilinear')
|
||||
|
||||
results['img'] = img
|
||||
results['img_shape'] = img.shape
|
||||
results['pad_shape'] = img.shape
|
||||
results['img_shape'] = img.shape[:2]
|
||||
results['pad_shape'] = img.shape[:2]
|
||||
|
||||
# Align segmentation map to multiple of size divisor.
|
||||
for key in results.get('seg_fields', []):
|
||||
@ -1147,3 +1148,81 @@ class RandomMosaic(BaseTransform):
|
||||
repr_str += f'pad_val={self.pad_val}, '
|
||||
repr_str += f'seg_pad_val={self.pad_val})'
|
||||
return repr_str
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class GenerateEdge(BaseTransform):
|
||||
"""Generate Edge for CE2P approach.
|
||||
|
||||
Edge will be used to calculate loss of
|
||||
`CE2P <https://arxiv.org/abs/1809.05996>`_.
|
||||
|
||||
Modified from https://github.com/liutinglt/CE2P/blob/master/dataset/target_generation.py # noqa:E501
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img_shape
|
||||
- gt_seg_map
|
||||
|
||||
Added Keys:
|
||||
- gt_edge (np.ndarray, uint8): The edge annotation generated from the
|
||||
seg map by extracting border between different semantics.
|
||||
|
||||
Args:
|
||||
edge_width (int): The width of edge. Default to 3.
|
||||
ignore_index (int): Index that will be ignored. Default to 255.
|
||||
"""
|
||||
|
||||
def __init__(self, edge_width: int = 3, ignore_index: int = 255) -> None:
|
||||
super().__init__()
|
||||
self.edge_width = edge_width
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
def transform(self, results: Dict) -> Dict:
|
||||
"""Call function to generate edge from segmentation map.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict.
|
||||
|
||||
Returns:
|
||||
dict: Result dict with edge mask.
|
||||
"""
|
||||
h, w = results['img_shape']
|
||||
edge = np.zeros((h, w), dtype=np.uint8)
|
||||
seg_map = results['gt_seg_map']
|
||||
|
||||
# down
|
||||
edge_down = edge[1:h, :]
|
||||
edge_down[(seg_map[1:h, :] != seg_map[:h - 1, :])
|
||||
& (seg_map[1:h, :] != self.ignore_index) &
|
||||
(seg_map[:h - 1, :] != self.ignore_index)] = 1
|
||||
# left
|
||||
edge_left = edge[:, :w - 1]
|
||||
edge_left[(seg_map[:, :w - 1] != seg_map[:, 1:w])
|
||||
& (seg_map[:, :w - 1] != self.ignore_index) &
|
||||
(seg_map[:, 1:w] != self.ignore_index)] = 1
|
||||
# up_left
|
||||
edge_upleft = edge[:h - 1, :w - 1]
|
||||
edge_upleft[(seg_map[:h - 1, :w - 1] != seg_map[1:h, 1:w])
|
||||
& (seg_map[:h - 1, :w - 1] != self.ignore_index) &
|
||||
(seg_map[1:h, 1:w] != self.ignore_index)] = 1
|
||||
# up_right
|
||||
edge_upright = edge[:h - 1, 1:w]
|
||||
edge_upright[(seg_map[:h - 1, 1:w] != seg_map[1:h, :w - 1])
|
||||
& (seg_map[:h - 1, 1:w] != self.ignore_index) &
|
||||
(seg_map[1:h, :w - 1] != self.ignore_index)] = 1
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
|
||||
(self.edge_width, self.edge_width))
|
||||
edge = cv2.dilate(edge, kernel)
|
||||
|
||||
results['gt_edge'] = edge
|
||||
results['edge_width'] = self.edge_width
|
||||
|
||||
return results
|
||||
|
||||
def __repr__(self):
|
||||
repr_str = self.__class__.__name__
|
||||
repr_str += f'edge_width={self.edge_width}, '
|
||||
repr_str += f'ignore_index={self.ignore_index})'
|
||||
return repr_str
|
||||
|
@ -204,5 +204,4 @@ class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor):
|
||||
warnings.warn('DeprecationWarning: Layer_decay_rate will '
|
||||
'be deleted, please use decay_rate instead.')
|
||||
paramwise_cfg['decay_rate'] = paramwise_cfg.pop('layer_decay_rate')
|
||||
super(LayerDecayOptimizerConstructor,
|
||||
self).__init__(optim_wrapper_cfg, paramwise_cfg)
|
||||
super().__init__(optim_wrapper_cfg, paramwise_cfg)
|
||||
|
@ -76,9 +76,8 @@ class CitysMetric(BaseMetric):
|
||||
output.putpalette(palette)
|
||||
output.save(png_filename)
|
||||
|
||||
ann_dir = osp.join(
|
||||
data_batch[0]['data_sample']['seg_map_path'].split('val')[0],
|
||||
'val')
|
||||
ann_dir = osp.join(data_samples[0]['seg_map_path'].split('val')[0],
|
||||
'val')
|
||||
self.results.append(ann_dir)
|
||||
|
||||
def compute_metrics(self, results: list) -> Dict[str, float]:
|
||||
@ -86,9 +85,6 @@ class CitysMetric(BaseMetric):
|
||||
|
||||
Args:
|
||||
results (list): Testing results of the dataset.
|
||||
logger (logging.Logger | str | None): Logger used for printing
|
||||
related information during evaluation. Default: None.
|
||||
imgfile_prefix (str | None): The prefix of output image file
|
||||
|
||||
Returns:
|
||||
dict[str: float]: Cityscapes evaluation results.
|
||||
|
@ -51,7 +51,7 @@ class IoUMetric(BaseMetric):
|
||||
"""Process one batch of data and data_samples.
|
||||
|
||||
The processed results should be stored in ``self.results``, which will
|
||||
be used to computed the metrics when all batches have been processed.
|
||||
be used to compute the metrics when all batches have been processed.
|
||||
|
||||
Args:
|
||||
data_batch (dict): A batch of data from the dataloader.
|
||||
@ -212,7 +212,7 @@ class IoUMetric(BaseMetric):
|
||||
metrics = [metrics]
|
||||
allowed_metrics = ['mIoU', 'mDice', 'mFscore']
|
||||
if not set(metrics).issubset(set(allowed_metrics)):
|
||||
raise KeyError('metrics {} is not supported'.format(metrics))
|
||||
raise KeyError(f'metrics {metrics} is not supported')
|
||||
|
||||
all_acc = total_area_intersect.sum() / total_area_label.sum()
|
||||
ret_metrics = OrderedDict({'aAcc': all_acc})
|
||||
|
@ -194,7 +194,7 @@ class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer):
|
||||
init_values=None):
|
||||
attn_cfg.update(dict(window_size=window_size, qk_scale=None))
|
||||
|
||||
super(BEiTTransformerEncoderLayer, self).__init__(
|
||||
super().__init__(
|
||||
embed_dims=embed_dims,
|
||||
num_heads=num_heads,
|
||||
feedforward_channels=feedforward_channels,
|
||||
@ -214,9 +214,9 @@ class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer):
|
||||
self.drop_path = build_dropout(
|
||||
dropout_layer) if dropout_layer else nn.Identity()
|
||||
self.gamma_1 = nn.Parameter(
|
||||
init_values * torch.ones((embed_dims)), requires_grad=True)
|
||||
init_values * torch.ones(embed_dims), requires_grad=True)
|
||||
self.gamma_2 = nn.Parameter(
|
||||
init_values * torch.ones((embed_dims)), requires_grad=True)
|
||||
init_values * torch.ones(embed_dims), requires_grad=True)
|
||||
|
||||
def build_attn(self, attn_cfg):
|
||||
self.attn = BEiTAttention(**attn_cfg)
|
||||
@ -287,7 +287,7 @@ class BEiT(BaseModule):
|
||||
pretrained=None,
|
||||
init_values=0.1,
|
||||
init_cfg=None):
|
||||
super(BEiT, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
if isinstance(img_size, int):
|
||||
img_size = to_2tuple(img_size)
|
||||
elif isinstance(img_size, tuple):
|
||||
@ -505,7 +505,7 @@ class BEiT(BaseModule):
|
||||
state_dict = self.resize_rel_pos_embed(checkpoint)
|
||||
self.load_state_dict(state_dict, False)
|
||||
elif self.init_cfg is not None:
|
||||
super(BEiT, self).init_weights()
|
||||
super().init_weights()
|
||||
else:
|
||||
# We only implement the 'jax_impl' initialization implemented at
|
||||
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
|
||||
@ -551,7 +551,7 @@ class BEiT(BaseModule):
|
||||
return tuple(outs)
|
||||
|
||||
def train(self, mode=True):
|
||||
super(BEiT, self).train(mode)
|
||||
super().train(mode)
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.LayerNorm):
|
||||
|
@ -29,7 +29,7 @@ class SpatialPath(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(SpatialPath, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert len(num_channels) == 4, 'Length of input channels \
|
||||
of Spatial Path must be 4!'
|
||||
|
||||
@ -98,7 +98,7 @@ class AttentionRefinementModule(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(AttentionRefinementModule, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.conv_layer = ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channel,
|
||||
@ -152,7 +152,7 @@ class ContextPath(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(ContextPath, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert len(context_channels) == 3, 'Length of input channels \
|
||||
of Context Path must be 3!'
|
||||
|
||||
@ -228,7 +228,7 @@ class FeatureFusionModule(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(FeatureFusionModule, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.conv1 = ConvModule(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
@ -304,7 +304,7 @@ class BiSeNetV1(BaseModule):
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
|
||||
super(BiSeNetV1, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert len(spatial_channels) == 4, 'Length of input channels \
|
||||
of Spatial Path must be 4!'
|
||||
|
||||
|
@ -37,7 +37,7 @@ class DetailBranch(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(DetailBranch, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
detail_branch = []
|
||||
for i in range(len(detail_channels)):
|
||||
if i == 0:
|
||||
@ -126,7 +126,7 @@ class StemBlock(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(StemBlock, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
self.conv_first = ConvModule(
|
||||
in_channels=in_channels,
|
||||
@ -207,7 +207,7 @@ class GELayer(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(GELayer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
mid_channel = in_channels * exp_ratio
|
||||
self.conv1 = ConvModule(
|
||||
in_channels=in_channels,
|
||||
@ -326,7 +326,7 @@ class CEBlock(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(CEBlock, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.gap = nn.Sequential(
|
||||
@ -385,7 +385,7 @@ class SemanticBranch(BaseModule):
|
||||
in_channels=3,
|
||||
exp_ratio=6,
|
||||
init_cfg=None):
|
||||
super(SemanticBranch, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.in_channels = in_channels
|
||||
self.semantic_channels = semantic_channels
|
||||
self.semantic_stages = []
|
||||
@ -458,7 +458,7 @@ class BGALayer(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(BGALayer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.out_channels = out_channels
|
||||
self.align_corners = align_corners
|
||||
self.detail_dwconv = nn.Sequential(
|
||||
@ -594,7 +594,7 @@ class BiSeNetV2(BaseModule):
|
||||
dict(
|
||||
type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm'])
|
||||
]
|
||||
super(BiSeNetV2, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.in_channels = in_channels
|
||||
self.out_indices = out_indices
|
||||
self.detail_channels = detail_channels
|
||||
|
@ -25,7 +25,7 @@ class GlobalContextExtractor(nn.Module):
|
||||
"""
|
||||
|
||||
def __init__(self, channel, reduction=16, with_cp=False):
|
||||
super(GlobalContextExtractor, self).__init__()
|
||||
super().__init__()
|
||||
self.channel = channel
|
||||
self.reduction = reduction
|
||||
assert reduction >= 1 and channel >= reduction
|
||||
@ -87,7 +87,7 @@ class ContextGuidedBlock(nn.Module):
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
act_cfg=dict(type='PReLU'),
|
||||
with_cp=False):
|
||||
super(ContextGuidedBlock, self).__init__()
|
||||
super().__init__()
|
||||
self.with_cp = with_cp
|
||||
self.downsample = downsample
|
||||
|
||||
@ -172,7 +172,7 @@ class InputInjection(nn.Module):
|
||||
"""Downsampling module for CGNet."""
|
||||
|
||||
def __init__(self, num_downsampling):
|
||||
super(InputInjection, self).__init__()
|
||||
super().__init__()
|
||||
self.pool = nn.ModuleList()
|
||||
for i in range(num_downsampling):
|
||||
self.pool.append(nn.AvgPool2d(3, stride=2, padding=1))
|
||||
@ -230,7 +230,7 @@ class CGNet(BaseModule):
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
|
||||
super(CGNet, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
assert not (init_cfg and pretrained), \
|
||||
'init_cfg and pretrained cannot be setting at the same time'
|
||||
@ -364,7 +364,7 @@ class CGNet(BaseModule):
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode will keeping the normalization
|
||||
layer freezed."""
|
||||
super(CGNet, self).train(mode)
|
||||
super().train(mode)
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
# trick: eval have effect on BatchNorm only
|
||||
|
@ -35,7 +35,7 @@ class DownsamplerBlock(BaseModule):
|
||||
norm_cfg=dict(type='BN', eps=1e-3),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(DownsamplerBlock, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
@ -95,7 +95,7 @@ class NonBottleneck1d(BaseModule):
|
||||
norm_cfg=dict(type='BN', eps=1e-3),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(NonBottleneck1d, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
@ -168,7 +168,7 @@ class UpsamplerBlock(BaseModule):
|
||||
norm_cfg=dict(type='BN', eps=1e-3),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(UpsamplerBlock, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
@ -242,7 +242,7 @@ class ERFNet(BaseModule):
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
|
||||
super(ERFNet, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert len(enc_downsample_channels) \
|
||||
== len(dec_upsample_channels)+1, 'Number of downsample\
|
||||
block of encoder does not \
|
||||
|
@ -36,7 +36,7 @@ class LearningToDownsample(nn.Module):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
dw_act_cfg=None):
|
||||
super(LearningToDownsample, self).__init__()
|
||||
super().__init__()
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
@ -124,7 +124,7 @@ class GlobalFeatureExtractor(nn.Module):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False):
|
||||
super(GlobalFeatureExtractor, self).__init__()
|
||||
super().__init__()
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
@ -220,7 +220,7 @@ class FeatureFusionModule(nn.Module):
|
||||
dwconv_act_cfg=dict(type='ReLU'),
|
||||
conv_act_cfg=None,
|
||||
align_corners=False):
|
||||
super(FeatureFusionModule, self).__init__()
|
||||
super().__init__()
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.dwconv_act_cfg = dwconv_act_cfg
|
||||
@ -340,7 +340,7 @@ class FastSCNN(BaseModule):
|
||||
dw_act_cfg=None,
|
||||
init_cfg=None):
|
||||
|
||||
super(FastSCNN, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
if init_cfg is None:
|
||||
self.init_cfg = [
|
||||
|
@ -30,7 +30,7 @@ class HRModule(BaseModule):
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
block_init_cfg=None,
|
||||
init_cfg=None):
|
||||
super(HRModule, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
self.block_init_cfg = block_init_cfg
|
||||
self._check_branches(num_branches, num_blocks, in_channels,
|
||||
num_channels)
|
||||
@ -308,7 +308,7 @@ class HRNet(BaseModule):
|
||||
multiscale_output=True,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(HRNet, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
self.pretrained = pretrained
|
||||
self.zero_init_residual = zero_init_residual
|
||||
@ -633,7 +633,7 @@ class HRNet(BaseModule):
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode will keeping the normalization
|
||||
layer freezed."""
|
||||
super(HRNet, self).train(mode)
|
||||
super().train(mode)
|
||||
self._freeze_stages()
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
|
@ -64,7 +64,7 @@ class ICNet(BaseModule):
|
||||
dict(type='Constant', val=1, layer='_BatchNorm'),
|
||||
dict(type='Normal', mean=0.01, layer='Linear')
|
||||
]
|
||||
super(ICNet, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.align_corners = align_corners
|
||||
self.backbone = MODELS.build(backbone_cfg)
|
||||
|
||||
|
@ -100,7 +100,7 @@ class MAE(BEiT):
|
||||
pretrained=None,
|
||||
init_values=0.1,
|
||||
init_cfg=None):
|
||||
super(MAE, self).__init__(
|
||||
super().__init__(
|
||||
img_size=img_size,
|
||||
patch_size=patch_size,
|
||||
in_channels=in_channels,
|
||||
@ -186,7 +186,7 @@ class MAE(BEiT):
|
||||
state_dict = self.resize_abs_pos_embed(state_dict)
|
||||
self.load_state_dict(state_dict, False)
|
||||
elif self.init_cfg is not None:
|
||||
super(MAE, self).init_weights()
|
||||
super().init_weights()
|
||||
else:
|
||||
# We only implement the 'jax_impl' initialization implemented at
|
||||
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
|
||||
|
@ -44,7 +44,7 @@ class MixFFN(BaseModule):
|
||||
ffn_drop=0.,
|
||||
dropout_layer=None,
|
||||
init_cfg=None):
|
||||
super(MixFFN, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
self.embed_dims = embed_dims
|
||||
self.feedforward_channels = feedforward_channels
|
||||
@ -253,7 +253,7 @@ class TransformerEncoderLayer(BaseModule):
|
||||
batch_first=True,
|
||||
sr_ratio=1,
|
||||
with_cp=False):
|
||||
super(TransformerEncoderLayer, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
# The ret[0] of build_norm_layer is norm name.
|
||||
self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]
|
||||
@ -357,7 +357,7 @@ class MixVisionTransformer(BaseModule):
|
||||
pretrained=None,
|
||||
init_cfg=None,
|
||||
with_cp=False):
|
||||
super(MixVisionTransformer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
assert not (init_cfg and pretrained), \
|
||||
'init_cfg and pretrained cannot be set at the same time'
|
||||
@ -433,7 +433,7 @@ class MixVisionTransformer(BaseModule):
|
||||
normal_init(
|
||||
m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0)
|
||||
else:
|
||||
super(MixVisionTransformer, self).init_weights()
|
||||
super().init_weights()
|
||||
|
||||
def forward(self, x):
|
||||
outs = []
|
||||
|
@ -63,7 +63,7 @@ class MobileNetV2(BaseModule):
|
||||
with_cp=False,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(MobileNetV2, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
self.pretrained = pretrained
|
||||
assert not (init_cfg and pretrained), \
|
||||
@ -189,7 +189,7 @@ class MobileNetV2(BaseModule):
|
||||
param.requires_grad = False
|
||||
|
||||
def train(self, mode=True):
|
||||
super(MobileNetV2, self).train(mode)
|
||||
super().train(mode)
|
||||
self._freeze_stages()
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
|
@ -81,7 +81,7 @@ class MobileNetV3(BaseModule):
|
||||
with_cp=False,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(MobileNetV3, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
self.pretrained = pretrained
|
||||
assert not (init_cfg and pretrained), \
|
||||
@ -175,7 +175,7 @@ class MobileNetV3(BaseModule):
|
||||
act_cfg=dict(type=act),
|
||||
with_cp=self.with_cp)
|
||||
in_channels = out_channels
|
||||
layer_name = 'layer{}'.format(i + 1)
|
||||
layer_name = f'layer{i + 1}'
|
||||
self.add_module(layer_name, layer)
|
||||
layers.append(layer_name)
|
||||
|
||||
@ -192,7 +192,7 @@ class MobileNetV3(BaseModule):
|
||||
conv_cfg=self.conv_cfg,
|
||||
norm_cfg=self.norm_cfg,
|
||||
act_cfg=dict(type='HSwish'))
|
||||
layer_name = 'layer{}'.format(len(layer_setting) + 1)
|
||||
layer_name = f'layer{len(layer_setting) + 1}'
|
||||
self.add_module(layer_name, layer)
|
||||
layers.append(layer_name)
|
||||
|
||||
@ -259,7 +259,7 @@ class MobileNetV3(BaseModule):
|
||||
param.requires_grad = False
|
||||
|
||||
def train(self, mode=True):
|
||||
super(MobileNetV3, self).train(mode)
|
||||
super().train(mode)
|
||||
self._freeze_stages()
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
|
@ -69,7 +69,7 @@ class SplitAttentionConv2d(nn.Module):
|
||||
conv_cfg=None,
|
||||
norm_cfg=dict(type='BN'),
|
||||
dcn=None):
|
||||
super(SplitAttentionConv2d, self).__init__()
|
||||
super().__init__()
|
||||
inter_channels = max(in_channels * radix // reduction_factor, 32)
|
||||
self.radix = radix
|
||||
self.groups = groups
|
||||
@ -174,7 +174,7 @@ class Bottleneck(_Bottleneck):
|
||||
avg_down_stride=True,
|
||||
**kwargs):
|
||||
"""Bottleneck block for ResNeSt."""
|
||||
super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
|
||||
super().__init__(inplanes, planes, **kwargs)
|
||||
|
||||
if groups == 1:
|
||||
width = self.planes
|
||||
@ -304,7 +304,7 @@ class ResNeSt(ResNetV1d):
|
||||
self.radix = radix
|
||||
self.reduction_factor = reduction_factor
|
||||
self.avg_down_stride = avg_down_stride
|
||||
super(ResNeSt, self).__init__(**kwargs)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def make_res_layer(self, **kwargs):
|
||||
"""Pack all blocks in a stage into a ``ResLayer``."""
|
||||
|
@ -29,7 +29,7 @@ class BasicBlock(BaseModule):
|
||||
dcn=None,
|
||||
plugins=None,
|
||||
init_cfg=None):
|
||||
super(BasicBlock, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
assert dcn is None, 'Not implemented yet.'
|
||||
assert plugins is None, 'Not implemented yet.'
|
||||
|
||||
@ -118,7 +118,7 @@ class Bottleneck(BaseModule):
|
||||
dcn=None,
|
||||
plugins=None,
|
||||
init_cfg=None):
|
||||
super(Bottleneck, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
assert style in ['pytorch', 'caffe']
|
||||
assert dcn is None or isinstance(dcn, dict)
|
||||
assert plugins is None or isinstance(plugins, list)
|
||||
@ -418,7 +418,7 @@ class ResNet(BaseModule):
|
||||
zero_init_residual=True,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(ResNet, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
if depth not in self.arch_settings:
|
||||
raise KeyError(f'invalid depth {depth} for resnet')
|
||||
|
||||
@ -676,7 +676,7 @@ class ResNet(BaseModule):
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode while keep normalization layer
|
||||
freezed."""
|
||||
super(ResNet, self).train(mode)
|
||||
super().train(mode)
|
||||
self._freeze_stages()
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
@ -696,8 +696,7 @@ class ResNetV1c(ResNet):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(ResNetV1c, self).__init__(
|
||||
deep_stem=True, avg_down=False, **kwargs)
|
||||
super().__init__(deep_stem=True, avg_down=False, **kwargs)
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
@ -710,5 +709,4 @@ class ResNetV1d(ResNet):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(ResNetV1d, self).__init__(
|
||||
deep_stem=True, avg_down=True, **kwargs)
|
||||
super().__init__(deep_stem=True, avg_down=True, **kwargs)
|
||||
|
@ -23,7 +23,7 @@ class Bottleneck(_Bottleneck):
|
||||
base_width=4,
|
||||
base_channels=64,
|
||||
**kwargs):
|
||||
super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
|
||||
super().__init__(inplanes, planes, **kwargs)
|
||||
|
||||
if groups == 1:
|
||||
width = self.planes
|
||||
@ -139,7 +139,7 @@ class ResNeXt(ResNet):
|
||||
def __init__(self, groups=1, base_width=4, **kwargs):
|
||||
self.groups = groups
|
||||
self.base_width = base_width
|
||||
super(ResNeXt, self).__init__(**kwargs)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def make_res_layer(self, **kwargs):
|
||||
"""Pack all blocks in a stage into a ``ResLayer``"""
|
||||
|
@ -35,7 +35,7 @@ class STDCModule(BaseModule):
|
||||
num_convs=4,
|
||||
fusion_type='add',
|
||||
init_cfg=None):
|
||||
super(STDCModule, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert num_convs > 1
|
||||
assert fusion_type in ['add', 'cat']
|
||||
self.stride = stride
|
||||
@ -155,7 +155,7 @@ class FeatureFusionModule(BaseModule):
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
init_cfg=None):
|
||||
super(FeatureFusionModule, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
channels = out_channels // scale_factor
|
||||
self.conv0 = ConvModule(
|
||||
in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg)
|
||||
@ -240,7 +240,7 @@ class STDCNet(BaseModule):
|
||||
with_final_conv=False,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(STDCNet, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert stdc_type in self.arch_settings, \
|
||||
f'invalid structure {stdc_type} for STDCNet.'
|
||||
assert bottleneck_type in ['add', 'cat'],\
|
||||
@ -370,7 +370,7 @@ class STDCContextPathNet(BaseModule):
|
||||
align_corners=None,
|
||||
norm_cfg=dict(type='BN'),
|
||||
init_cfg=None):
|
||||
super(STDCContextPathNet, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.backbone = MODELS.build(backbone_cfg)
|
||||
self.arms = ModuleList()
|
||||
self.convs = ModuleList()
|
||||
|
@ -326,7 +326,7 @@ class SwinBlock(BaseModule):
|
||||
with_cp=False,
|
||||
init_cfg=None):
|
||||
|
||||
super(SwinBlock, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
self.with_cp = with_cp
|
||||
|
||||
@ -561,7 +561,7 @@ class SwinTransformer(BaseModule):
|
||||
else:
|
||||
raise TypeError('pretrained must be a str or None')
|
||||
|
||||
super(SwinTransformer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
num_layers = len(depths)
|
||||
self.out_indices = out_indices
|
||||
@ -636,7 +636,7 @@ class SwinTransformer(BaseModule):
|
||||
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode while keep layers freezed."""
|
||||
super(SwinTransformer, self).train(mode)
|
||||
super().train(mode)
|
||||
self._freeze_stages()
|
||||
|
||||
def _freeze_stages(self):
|
||||
|
@ -37,7 +37,7 @@ class TIMMBackbone(BaseModule):
|
||||
):
|
||||
if timm is None:
|
||||
raise RuntimeError('timm is not installed')
|
||||
super(TIMMBackbone, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
if 'norm_layer' in kwargs:
|
||||
kwargs['norm_layer'] = MMENGINE_MODELS.get(kwargs['norm_layer'])
|
||||
self.timm_model = timm.create_model(
|
||||
|
@ -62,7 +62,7 @@ class GlobalSubsampledAttention(EfficientMultiheadAttention):
|
||||
norm_cfg=dict(type='LN'),
|
||||
sr_ratio=1,
|
||||
init_cfg=None):
|
||||
super(GlobalSubsampledAttention, self).__init__(
|
||||
super().__init__(
|
||||
embed_dims,
|
||||
num_heads,
|
||||
attn_drop=attn_drop,
|
||||
@ -112,7 +112,7 @@ class GSAEncoderLayer(BaseModule):
|
||||
norm_cfg=dict(type='LN'),
|
||||
sr_ratio=1.,
|
||||
init_cfg=None):
|
||||
super(GSAEncoderLayer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
|
||||
self.attn = GlobalSubsampledAttention(
|
||||
@ -172,7 +172,7 @@ class LocallyGroupedSelfAttention(BaseModule):
|
||||
proj_drop_rate=0.,
|
||||
window_size=1,
|
||||
init_cfg=None):
|
||||
super(LocallyGroupedSelfAttention, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
assert embed_dims % num_heads == 0, f'dim {embed_dims} should be ' \
|
||||
f'divided by num_heads ' \
|
||||
@ -284,7 +284,7 @@ class LSAEncoderLayer(BaseModule):
|
||||
window_size=1,
|
||||
init_cfg=None):
|
||||
|
||||
super(LSAEncoderLayer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1]
|
||||
self.attn = LocallyGroupedSelfAttention(embed_dims, num_heads,
|
||||
@ -325,7 +325,7 @@ class ConditionalPositionEncoding(BaseModule):
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, embed_dims=768, stride=1, init_cfg=None):
|
||||
super(ConditionalPositionEncoding, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
self.proj = nn.Conv2d(
|
||||
in_channels,
|
||||
embed_dims,
|
||||
@ -401,7 +401,7 @@ class PCPVT(BaseModule):
|
||||
norm_after_stage=False,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(PCPVT, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
assert not (init_cfg and pretrained), \
|
||||
'init_cfg and pretrained cannot be set at the same time'
|
||||
if isinstance(pretrained, str):
|
||||
@ -471,7 +471,7 @@ class PCPVT(BaseModule):
|
||||
|
||||
def init_weights(self):
|
||||
if self.init_cfg is not None:
|
||||
super(PCPVT, self).init_weights()
|
||||
super().init_weights()
|
||||
else:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
@ -563,11 +563,11 @@ class SVT(PCPVT):
|
||||
norm_after_stage=True,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(SVT, self).__init__(in_channels, embed_dims, patch_sizes,
|
||||
strides, num_heads, mlp_ratios, out_indices,
|
||||
qkv_bias, drop_rate, attn_drop_rate,
|
||||
drop_path_rate, norm_cfg, depths, sr_ratios,
|
||||
norm_after_stage, pretrained, init_cfg)
|
||||
super().__init__(in_channels, embed_dims, patch_sizes, strides,
|
||||
num_heads, mlp_ratios, out_indices, qkv_bias,
|
||||
drop_rate, attn_drop_rate, drop_path_rate, norm_cfg,
|
||||
depths, sr_ratios, norm_after_stage, pretrained,
|
||||
init_cfg)
|
||||
# transformer encoder
|
||||
dpr = [
|
||||
x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
|
||||
|
@ -53,7 +53,7 @@ class BasicConvBlock(nn.Module):
|
||||
act_cfg=dict(type='ReLU'),
|
||||
dcn=None,
|
||||
plugins=None):
|
||||
super(BasicConvBlock, self).__init__()
|
||||
super().__init__()
|
||||
assert dcn is None, 'Not implemented yet.'
|
||||
assert plugins is None, 'Not implemented yet.'
|
||||
|
||||
@ -112,7 +112,7 @@ class DeconvModule(nn.Module):
|
||||
*,
|
||||
kernel_size=4,
|
||||
scale_factor=2):
|
||||
super(DeconvModule, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
assert (kernel_size - scale_factor >= 0) and\
|
||||
(kernel_size - scale_factor) % 2 == 0,\
|
||||
@ -191,7 +191,7 @@ class InterpConv(nn.Module):
|
||||
padding=0,
|
||||
upsample_cfg=dict(
|
||||
scale_factor=2, mode='bilinear', align_corners=False)):
|
||||
super(InterpConv, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
self.with_cp = with_cp
|
||||
conv = ConvModule(
|
||||
@ -298,7 +298,7 @@ class UNet(BaseModule):
|
||||
plugins=None,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(UNet, self).__init__(init_cfg)
|
||||
super().__init__(init_cfg)
|
||||
|
||||
self.pretrained = pretrained
|
||||
assert not (init_cfg and pretrained), \
|
||||
@ -396,7 +396,7 @@ class UNet(BaseModule):
|
||||
act_cfg=act_cfg,
|
||||
dcn=None,
|
||||
plugins=None))
|
||||
self.encoder.append((nn.Sequential(*enc_conv_block)))
|
||||
self.encoder.append(nn.Sequential(*enc_conv_block))
|
||||
in_channels = base_channels * 2**i
|
||||
|
||||
def forward(self, x):
|
||||
@ -415,7 +415,7 @@ class UNet(BaseModule):
|
||||
def train(self, mode=True):
|
||||
"""Convert the model into training mode while keep normalization layer
|
||||
freezed."""
|
||||
super(UNet, self).train(mode)
|
||||
super().train(mode)
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
# trick: eval have effect on BatchNorm only
|
||||
|
@ -60,7 +60,7 @@ class TransformerEncoderLayer(BaseModule):
|
||||
attn_cfg=dict(),
|
||||
ffn_cfg=dict(),
|
||||
with_cp=False):
|
||||
super(TransformerEncoderLayer, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
self.norm1_name, norm1 = build_norm_layer(
|
||||
norm_cfg, embed_dims, postfix=1)
|
||||
@ -197,7 +197,7 @@ class VisionTransformer(BaseModule):
|
||||
with_cp=False,
|
||||
pretrained=None,
|
||||
init_cfg=None):
|
||||
super(VisionTransformer, self).__init__(init_cfg=init_cfg)
|
||||
super().__init__(init_cfg=init_cfg)
|
||||
|
||||
if isinstance(img_size, int):
|
||||
img_size = to_2tuple(img_size)
|
||||
@ -315,7 +315,7 @@ class VisionTransformer(BaseModule):
|
||||
|
||||
load_state_dict(self, state_dict, strict=False, logger=None)
|
||||
elif self.init_cfg is not None:
|
||||
super(VisionTransformer, self).init_weights()
|
||||
super().init_weights()
|
||||
else:
|
||||
# We only implement the 'jax_impl' initialization implemented at
|
||||
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501
|
||||
@ -335,7 +335,7 @@ class VisionTransformer(BaseModule):
|
||||
constant_init(m, val=1.0, bias=0.)
|
||||
|
||||
def _pos_embeding(self, patched_img, hw_shape, pos_embed):
|
||||
"""Positiong embeding method.
|
||||
"""Positioning embeding method.
|
||||
|
||||
Resize the pos_embed, if the input image size doesn't match
|
||||
the training size.
|
||||
@ -431,7 +431,7 @@ class VisionTransformer(BaseModule):
|
||||
return tuple(outs)
|
||||
|
||||
def train(self, mode=True):
|
||||
super(VisionTransformer, self).train(mode)
|
||||
super().train(mode)
|
||||
if mode and self.norm_eval:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.LayerNorm):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user