mmsegmentation/mmseg/datasets/pipelines/transforms.py

1043 lines
36 KiB
Python
Raw Normal View History

# Copyright (c) OpenMMLab. All rights reserved.
2020-07-07 20:52:19 +08:00
import mmcv
import numpy as np
from mmcv.utils import deprecated_api_warning, is_tuple_of
2020-07-07 20:52:19 +08:00
from numpy import random
from ..builder import PIPELINES
[Feature] Add segformer decode head and related train config (#599) * [Feature]Segformer re-implementation * Using act_cfg and norm_cfg to control activation and normalization * Split this PR into several little PRs * Fix lint error * Remove SegFormerHead * [Feature] Add segformer decode head and related train config * Add ade20K trainval support for segformer 1. Add related train and val configs; 2. Add AlignedResize; * Set arg: find_unused_parameters = True * parameters init refactor * 1. Refactor segformer backbone parameters init; 2. Remove rebundant functions and unit tests; * Remove rebundant codes * Replace Linear Layer to 1X1 Conv * Use nn.ModuleList to refactor segformer head. * Remove local to_xtuple * 1. Remove rebundant codes; 2. Modify module name; * Refactor the backbone of segformer using mmcv.cnn.bricks.transformer.py * Fix some code logic bugs. * Add mit_convert.py to match pretrain keys of segformer. * Resolve some comments. * 1. Add some assert to ensure right params; 2. Support flexible peconv position; * Add pe_index assert and fix unit test. * 1. Add doc string for MixVisionTransformer; 2. Add some unit tests for MixVisionTransformer; * Use hw_shape to pass shape of feature map. * 1. Fix doc string of MixVisionTransformer; 2. Simplify MixFFN; 3. Modify H, W to hw_shape; * Add more unit tests. * Add doc string for shape convertion functions. * Add some unit tests to improve code coverage. * Fix Segformer backbone pretrain weights match bug. * Modify configs of segformer. * resolve the shape convertion functions doc string. * Add pad_to_patch_size arg. * Support progressive test with fewer memory cost. * Modify default value of pad_to_patch_size arg. * Temp code * Using processor to refactor evaluation workflow. * refactor eval hook. * Fix process bar. * Fix middle save argument. * Modify some variable name of dataset evaluate api. * Modify some viriable name of eval hook. * Fix some priority bugs of eval hook. * Fix some bugs about model loading and eval hook. * Add ade20k 640x640 dataset. * Fix related segformer configs. * Depreciated efficient_test. * Fix training progress blocked by eval hook. * Depreciated old test api. * Modify error patch size. * Fix pretrain of mit_b0 * Fix the test api error. * Modify dataset base config. * Fix test api error. * Modify outer api. * Build a sampler test api. * TODO: Refactor format_results. * Modify variable names. * Fix num_classes bug. * Fix sampler index bug. * Fix grammaly bug. * Add part of benchmark results. * Support batch sampler. * More readable test api. * Remove some command arg and fix eval hook bug. * Support format-only arg. * Modify format_results of datasets. * Modify tool which use test apis. * Update readme. * Update readme of segformer. * Updata readme of segformer. * Update segformer readme and fix segformer mit_b4. * Update readme of segformer. * Clean AlignedResize related config. * Clean code from pr #709 * Clean code from pr #709 * Add 512x512 segformer_mit-b5. * Fix lint. * Fix some segformer head bugs. * Add segformer unit tests. * Replace AlignedResize to ResizeToMultiple. * Modify readme of segformer. * Fix bug of ResizeToMultiple. * Add ResizeToMultiple unit tests. * Resolve conflict. * Simplify the implementation of ResizeToMultiple. * Update test results. * Fix multi-scale test error when resize_ratio=1.75 and input size=640x640. * Update segformer results. * Update Segformer results. * Fix some url bugs and pipelines bug. * Move ckpt convertion to tools. * Add segformer official pretrain weights usage. * Clean redundant codes. * Remove redundant codes. * Unfied format. * Add description for segformer converter. * Update workers.
2021-08-13 13:31:19 +08:00
@PIPELINES.register_module()
class ResizeToMultiple(object):
"""Resize images & seg to multiple of divisor.
Args:
size_divisor (int): images and gt seg maps need to resize to multiple
of size_divisor. Default: 32.
interpolation (str, optional): The interpolation mode of image resize.
Default: None
"""
def __init__(self, size_divisor=32, interpolation=None):
self.size_divisor = size_divisor
self.interpolation = interpolation
def __call__(self, results):
"""Call function to resize images, semantic segmentation map to
multiple of size divisor.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape' keys are updated.
"""
# Align image to multiple of size divisor.
img = results['img']
img = mmcv.imresize_to_multiple(
img,
self.size_divisor,
scale_factor=1,
interpolation=self.interpolation
if self.interpolation else 'bilinear')
results['img'] = img
results['img_shape'] = img.shape
results['pad_shape'] = img.shape
# Align segmentation map to multiple of size divisor.
for key in results.get('seg_fields', []):
gt_seg = results[key]
gt_seg = mmcv.imresize_to_multiple(
gt_seg,
self.size_divisor,
scale_factor=1,
interpolation='nearest')
results[key] = gt_seg
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (f'(size_divisor={self.size_divisor}, '
f'interpolation={self.interpolation})')
return repr_str
2020-07-07 20:52:19 +08:00
@PIPELINES.register_module()
class Resize(object):
"""Resize images & seg.
This transform resizes the input image to some scale. If the input dict
contains the key "scale", then the scale in the input dict is used,
otherwise the specified scale in the init method is used.
``img_scale`` can be None, a tuple (single-scale) or a list of tuple
(multi-scale). There are 4 multiscale modes:
- ``ratio_range is not None``:
1. When img_scale is None, img_scale is the shape of image in results
(img_scale = results['img'].shape[:2]) and the image is resized based
on the original size. (mode 1)
2. When img_scale is a tuple (single-scale), randomly sample a ratio from
the ratio range and multiply it with the image scale. (mode 2)
2020-07-07 20:52:19 +08:00
- ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
scale from the a range. (mode 3)
2020-07-07 20:52:19 +08:00
- ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
scale from multiple scales. (mode 4)
2020-07-07 20:52:19 +08:00
Args:
img_scale (tuple or list[tuple]): Images scales for resizing.
[Feature] Official implementation of SETR (#531) * Adjust vision transformer backbone architectures; * Add DropPath, trunc_normal_ for VisionTransformer implementation; * Add class token buring intermediate period and remove it during final period; * Fix some parameters loss bug; * * Store intermediate token features and impose no processes on them; * Remove class token and reshape entire token feature from NLC to NCHW; * Fix some doc error * Add a arg for VisionTransformer backbone to control if input class token into transformer; * Add stochastic depth decay rule for DropPath; * * Fix output bug when input_cls_token=False; * Add related unit test; * Re-implement of SETR * Add two head -- SETRUPHead (Naive, PUP) & SETRMLAHead (MLA); * * Modify some docs of heads of SETR; * Add MLA auxiliary head of SETR; * * Modify some arg of setr heads; * Add unit test for setr heads; * * Add 768x768 cityscapes dataset config; * Add Backbone: SETR -- Backbone: MLA, PUP, Naive; * Add SETR cityscapes training & testing config; * * Fix the low code coverage of unit test about heads of setr; * Remove some rebundant error capture; * * Add pascal context dataset & ade20k dataset config; * Modify auxiliary head relative config; * Modify folder structure. * add setr * modify vit * Fix the test_cfg arg position; * Fix some learning schedule bug; * optimize setr code * Add arg: final_reshape to control if converting output feature information from NLC to NCHW; * Fix the default value of final_reshape; * Modify arg: final_reshape to arg: out_shape; * Fix some unit test bug; * Add MLA neck; * Modify setr configs to add MLA neck; * Modify MLA decode head to remove rebundant structure; * Remove some rebundant files. * * Fix the code style bug; * Remove some rebundant files; * Modify some unit tests of SETR; * Ignoring CityscapesCoarseDataset and MapillaryDataset. * Fix the activation function loss bug; * Fix the img_size bug of SETR_PUP_ADE20K * * Fix the lint bug of transformers.py; * Add mla neck unit test; * Convert vit of setr out shape from NLC to NCHW. * * Modify Resize action of data pipeline; * Fix deit related bug; * Set find_unused_parameters=False for pascal context dataset; * Remove arg: find_unused_parameters which is False by default. * Error auxiliary head of PUP deit * Remove the minimal restrict of slide inference. * Modify doc string of Resize * Seperate this part of code to a new PR #544 * * Remove some rebundant codes; * Modify unit tests of SETR heads; * Fix the tuple in_channels of mla_deit. * Modify code style * Move detailed definition of auxiliary head into model config dict; * Add some setr config for default cityscapes.py; * Fix the doc string of SETR head; * Modify implementation of SETR Heads * Remove setr aux head and use fcn head to replace it; * Remove arg: img_size and remove last interpolate op of heads; * Rename arg: conv3x3_conv1x1 to kernel_size of SETRUPHead; * non-square input support for setr heads * Modify config argument for above commits * Remove norm_layer argument of SETRMLAHead * Add mla_align_corners for MLAModule interpolate * [Refactor]Refactor of SETRMLAHead * Modify Head implementation; * Modify Head unit test; * Modify related config file; * [Refactor]MLA Neck * Fix config bug * [Refactor]SETR Naive Head and SETR PUP Head * [Fix]Fix the lack of arg: act_cfg and arg: norm_cfg * Fix config error * Refactor of SETR MLA, Naive, PUP heads. * Modify some attribute name of SETR Heads. * Modify setr configs to adapt new vit code. * Fix trunc_normal_ bug * Parameters init adjustment. * Remove redundant doc string of SETRUPHead * Fix pretrained bug * [Fix] Fix vit init bug * Add some vit unit tests * Modify module import * Remove norm from PatchEmbed * Fix pretrain weights bug * Modify pretrained judge * Fix some gradient backward bugs. * Add some unit tests to improve code cov * Fix init_weights of setr up head * Add DropPath in FFN * Finish benchmark of SETR 1. Add benchmark information into README.MD of SETR; 2. Fix some name bugs of vit; * Remove DropPath implementation and use DropPath from mmcv. * Modify out_indices arg * Fix out_indices bug. * Remove cityscapes base dataset config. Co-authored-by: sennnnn <201730271412@mail.scut.edu.cn> Co-authored-by: CuttlefishXuan <zhaoxinxuan1997@gmail.com>
2021-06-24 00:39:29 +08:00
Default:None.
2020-07-07 20:52:19 +08:00
multiscale_mode (str): Either "range" or "value".
[Feature] Official implementation of SETR (#531) * Adjust vision transformer backbone architectures; * Add DropPath, trunc_normal_ for VisionTransformer implementation; * Add class token buring intermediate period and remove it during final period; * Fix some parameters loss bug; * * Store intermediate token features and impose no processes on them; * Remove class token and reshape entire token feature from NLC to NCHW; * Fix some doc error * Add a arg for VisionTransformer backbone to control if input class token into transformer; * Add stochastic depth decay rule for DropPath; * * Fix output bug when input_cls_token=False; * Add related unit test; * Re-implement of SETR * Add two head -- SETRUPHead (Naive, PUP) & SETRMLAHead (MLA); * * Modify some docs of heads of SETR; * Add MLA auxiliary head of SETR; * * Modify some arg of setr heads; * Add unit test for setr heads; * * Add 768x768 cityscapes dataset config; * Add Backbone: SETR -- Backbone: MLA, PUP, Naive; * Add SETR cityscapes training & testing config; * * Fix the low code coverage of unit test about heads of setr; * Remove some rebundant error capture; * * Add pascal context dataset & ade20k dataset config; * Modify auxiliary head relative config; * Modify folder structure. * add setr * modify vit * Fix the test_cfg arg position; * Fix some learning schedule bug; * optimize setr code * Add arg: final_reshape to control if converting output feature information from NLC to NCHW; * Fix the default value of final_reshape; * Modify arg: final_reshape to arg: out_shape; * Fix some unit test bug; * Add MLA neck; * Modify setr configs to add MLA neck; * Modify MLA decode head to remove rebundant structure; * Remove some rebundant files. * * Fix the code style bug; * Remove some rebundant files; * Modify some unit tests of SETR; * Ignoring CityscapesCoarseDataset and MapillaryDataset. * Fix the activation function loss bug; * Fix the img_size bug of SETR_PUP_ADE20K * * Fix the lint bug of transformers.py; * Add mla neck unit test; * Convert vit of setr out shape from NLC to NCHW. * * Modify Resize action of data pipeline; * Fix deit related bug; * Set find_unused_parameters=False for pascal context dataset; * Remove arg: find_unused_parameters which is False by default. * Error auxiliary head of PUP deit * Remove the minimal restrict of slide inference. * Modify doc string of Resize * Seperate this part of code to a new PR #544 * * Remove some rebundant codes; * Modify unit tests of SETR heads; * Fix the tuple in_channels of mla_deit. * Modify code style * Move detailed definition of auxiliary head into model config dict; * Add some setr config for default cityscapes.py; * Fix the doc string of SETR head; * Modify implementation of SETR Heads * Remove setr aux head and use fcn head to replace it; * Remove arg: img_size and remove last interpolate op of heads; * Rename arg: conv3x3_conv1x1 to kernel_size of SETRUPHead; * non-square input support for setr heads * Modify config argument for above commits * Remove norm_layer argument of SETRMLAHead * Add mla_align_corners for MLAModule interpolate * [Refactor]Refactor of SETRMLAHead * Modify Head implementation; * Modify Head unit test; * Modify related config file; * [Refactor]MLA Neck * Fix config bug * [Refactor]SETR Naive Head and SETR PUP Head * [Fix]Fix the lack of arg: act_cfg and arg: norm_cfg * Fix config error * Refactor of SETR MLA, Naive, PUP heads. * Modify some attribute name of SETR Heads. * Modify setr configs to adapt new vit code. * Fix trunc_normal_ bug * Parameters init adjustment. * Remove redundant doc string of SETRUPHead * Fix pretrained bug * [Fix] Fix vit init bug * Add some vit unit tests * Modify module import * Remove norm from PatchEmbed * Fix pretrain weights bug * Modify pretrained judge * Fix some gradient backward bugs. * Add some unit tests to improve code cov * Fix init_weights of setr up head * Add DropPath in FFN * Finish benchmark of SETR 1. Add benchmark information into README.MD of SETR; 2. Fix some name bugs of vit; * Remove DropPath implementation and use DropPath from mmcv. * Modify out_indices arg * Fix out_indices bug. * Remove cityscapes base dataset config. Co-authored-by: sennnnn <201730271412@mail.scut.edu.cn> Co-authored-by: CuttlefishXuan <zhaoxinxuan1997@gmail.com>
2021-06-24 00:39:29 +08:00
Default: 'range'
ratio_range (tuple[float]): (min_ratio, max_ratio).
Default: None
2020-07-07 20:52:19 +08:00
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
[Feature] Official implementation of SETR (#531) * Adjust vision transformer backbone architectures; * Add DropPath, trunc_normal_ for VisionTransformer implementation; * Add class token buring intermediate period and remove it during final period; * Fix some parameters loss bug; * * Store intermediate token features and impose no processes on them; * Remove class token and reshape entire token feature from NLC to NCHW; * Fix some doc error * Add a arg for VisionTransformer backbone to control if input class token into transformer; * Add stochastic depth decay rule for DropPath; * * Fix output bug when input_cls_token=False; * Add related unit test; * Re-implement of SETR * Add two head -- SETRUPHead (Naive, PUP) & SETRMLAHead (MLA); * * Modify some docs of heads of SETR; * Add MLA auxiliary head of SETR; * * Modify some arg of setr heads; * Add unit test for setr heads; * * Add 768x768 cityscapes dataset config; * Add Backbone: SETR -- Backbone: MLA, PUP, Naive; * Add SETR cityscapes training & testing config; * * Fix the low code coverage of unit test about heads of setr; * Remove some rebundant error capture; * * Add pascal context dataset & ade20k dataset config; * Modify auxiliary head relative config; * Modify folder structure. * add setr * modify vit * Fix the test_cfg arg position; * Fix some learning schedule bug; * optimize setr code * Add arg: final_reshape to control if converting output feature information from NLC to NCHW; * Fix the default value of final_reshape; * Modify arg: final_reshape to arg: out_shape; * Fix some unit test bug; * Add MLA neck; * Modify setr configs to add MLA neck; * Modify MLA decode head to remove rebundant structure; * Remove some rebundant files. * * Fix the code style bug; * Remove some rebundant files; * Modify some unit tests of SETR; * Ignoring CityscapesCoarseDataset and MapillaryDataset. * Fix the activation function loss bug; * Fix the img_size bug of SETR_PUP_ADE20K * * Fix the lint bug of transformers.py; * Add mla neck unit test; * Convert vit of setr out shape from NLC to NCHW. * * Modify Resize action of data pipeline; * Fix deit related bug; * Set find_unused_parameters=False for pascal context dataset; * Remove arg: find_unused_parameters which is False by default. * Error auxiliary head of PUP deit * Remove the minimal restrict of slide inference. * Modify doc string of Resize * Seperate this part of code to a new PR #544 * * Remove some rebundant codes; * Modify unit tests of SETR heads; * Fix the tuple in_channels of mla_deit. * Modify code style * Move detailed definition of auxiliary head into model config dict; * Add some setr config for default cityscapes.py; * Fix the doc string of SETR head; * Modify implementation of SETR Heads * Remove setr aux head and use fcn head to replace it; * Remove arg: img_size and remove last interpolate op of heads; * Rename arg: conv3x3_conv1x1 to kernel_size of SETRUPHead; * non-square input support for setr heads * Modify config argument for above commits * Remove norm_layer argument of SETRMLAHead * Add mla_align_corners for MLAModule interpolate * [Refactor]Refactor of SETRMLAHead * Modify Head implementation; * Modify Head unit test; * Modify related config file; * [Refactor]MLA Neck * Fix config bug * [Refactor]SETR Naive Head and SETR PUP Head * [Fix]Fix the lack of arg: act_cfg and arg: norm_cfg * Fix config error * Refactor of SETR MLA, Naive, PUP heads. * Modify some attribute name of SETR Heads. * Modify setr configs to adapt new vit code. * Fix trunc_normal_ bug * Parameters init adjustment. * Remove redundant doc string of SETRUPHead * Fix pretrained bug * [Fix] Fix vit init bug * Add some vit unit tests * Modify module import * Remove norm from PatchEmbed * Fix pretrain weights bug * Modify pretrained judge * Fix some gradient backward bugs. * Add some unit tests to improve code cov * Fix init_weights of setr up head * Add DropPath in FFN * Finish benchmark of SETR 1. Add benchmark information into README.MD of SETR; 2. Fix some name bugs of vit; * Remove DropPath implementation and use DropPath from mmcv. * Modify out_indices arg * Fix out_indices bug. * Remove cityscapes base dataset config. Co-authored-by: sennnnn <201730271412@mail.scut.edu.cn> Co-authored-by: CuttlefishXuan <zhaoxinxuan1997@gmail.com>
2021-06-24 00:39:29 +08:00
image. Default: True
2020-07-07 20:52:19 +08:00
"""
def __init__(self,
img_scale=None,
multiscale_mode='range',
ratio_range=None,
keep_ratio=True):
if img_scale is None:
self.img_scale = None
else:
if isinstance(img_scale, list):
self.img_scale = img_scale
else:
self.img_scale = [img_scale]
assert mmcv.is_list_of(self.img_scale, tuple)
if ratio_range is not None:
# mode 1: given img_scale=None and a range of image ratio
# mode 2: given a scale and a range of image ratio
assert self.img_scale is None or len(self.img_scale) == 1
2020-07-07 20:52:19 +08:00
else:
# mode 3 and 4: given multiple scales or a range of scales
2020-07-07 20:52:19 +08:00
assert multiscale_mode in ['value', 'range']
self.multiscale_mode = multiscale_mode
self.ratio_range = ratio_range
self.keep_ratio = keep_ratio
@staticmethod
def random_select(img_scales):
"""Randomly select an img_scale from given candidates.
Args:
img_scales (list[tuple]): Images scales for selection.
Returns:
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
where ``img_scale`` is the selected image scale and
``scale_idx`` is the selected index in the given candidates.
"""
assert mmcv.is_list_of(img_scales, tuple)
scale_idx = np.random.randint(len(img_scales))
img_scale = img_scales[scale_idx]
return img_scale, scale_idx
@staticmethod
def random_sample(img_scales):
"""Randomly sample an img_scale when ``multiscale_mode=='range'``.
Args:
img_scales (list[tuple]): Images scale range for sampling.
There must be two tuples in img_scales, which specify the lower
and upper bound of image scales.
2020-07-07 20:52:19 +08:00
Returns:
(tuple, None): Returns a tuple ``(img_scale, None)``, where
``img_scale`` is sampled scale and None is just a placeholder
to be consistent with :func:`random_select`.
"""
assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
img_scale_long = [max(s) for s in img_scales]
img_scale_short = [min(s) for s in img_scales]
long_edge = np.random.randint(
min(img_scale_long),
max(img_scale_long) + 1)
short_edge = np.random.randint(
min(img_scale_short),
max(img_scale_short) + 1)
img_scale = (long_edge, short_edge)
return img_scale, None
@staticmethod
def random_sample_ratio(img_scale, ratio_range):
"""Randomly sample an img_scale when ``ratio_range`` is specified.
A ratio will be randomly sampled from the range specified by
``ratio_range``. Then it would be multiplied with ``img_scale`` to
generate sampled scale.
Args:
img_scale (tuple): Images scale base to multiply with ratio.
ratio_range (tuple[float]): The minimum and maximum ratio to scale
the ``img_scale``.
Returns:
(tuple, None): Returns a tuple ``(scale, None)``, where
``scale`` is sampled ratio multiplied with ``img_scale`` and
None is just a placeholder to be consistent with
:func:`random_select`.
"""
assert isinstance(img_scale, tuple) and len(img_scale) == 2
min_ratio, max_ratio = ratio_range
assert min_ratio <= max_ratio
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
return scale, None
def _random_scale(self, results):
"""Randomly sample an img_scale according to ``ratio_range`` and
``multiscale_mode``.
If ``ratio_range`` is specified, a ratio will be sampled and be
multiplied with ``img_scale``.
If multiple scales are specified by ``img_scale``, a scale will be
sampled according to ``multiscale_mode``.
Otherwise, single scale will be used.
Args:
results (dict): Result dict from :obj:`dataset`.
Returns:
dict: Two new keys 'scale` and 'scale_idx` are added into
``results``, which would be used by subsequent pipelines.
"""
if self.ratio_range is not None:
if self.img_scale is None:
h, w = results['img'].shape[:2]
scale, scale_idx = self.random_sample_ratio((w, h),
self.ratio_range)
else:
scale, scale_idx = self.random_sample_ratio(
self.img_scale[0], self.ratio_range)
2020-07-07 20:52:19 +08:00
elif len(self.img_scale) == 1:
scale, scale_idx = self.img_scale[0], 0
elif self.multiscale_mode == 'range':
scale, scale_idx = self.random_sample(self.img_scale)
elif self.multiscale_mode == 'value':
scale, scale_idx = self.random_select(self.img_scale)
else:
raise NotImplementedError
results['scale'] = scale
results['scale_idx'] = scale_idx
def _resize_img(self, results):
"""Resize images with ``results['scale']``."""
if self.keep_ratio:
img, scale_factor = mmcv.imrescale(
results['img'], results['scale'], return_scale=True)
# the w_scale and h_scale has minor difference
# a real fix should be done in the mmcv.imrescale in the future
new_h, new_w = img.shape[:2]
h, w = results['img'].shape[:2]
w_scale = new_w / w
h_scale = new_h / h
else:
img, w_scale, h_scale = mmcv.imresize(
results['img'], results['scale'], return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
results['img'] = img
results['img_shape'] = img.shape
results['pad_shape'] = img.shape # in case that there is no padding
results['scale_factor'] = scale_factor
results['keep_ratio'] = self.keep_ratio
def _resize_seg(self, results):
"""Resize semantic segmentation map with ``results['scale']``."""
for key in results.get('seg_fields', []):
if self.keep_ratio:
gt_seg = mmcv.imrescale(
results[key], results['scale'], interpolation='nearest')
else:
gt_seg = mmcv.imresize(
results[key], results['scale'], interpolation='nearest')
results[key] = gt_seg
2020-07-07 20:52:19 +08:00
def __call__(self, results):
"""Call function to resize images, bounding boxes, masks, semantic
segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
'keep_ratio' keys are added into result dict.
"""
if 'scale' not in results:
self._random_scale(results)
self._resize_img(results)
self._resize_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (f'(img_scale={self.img_scale}, '
f'multiscale_mode={self.multiscale_mode}, '
f'ratio_range={self.ratio_range}, '
f'keep_ratio={self.keep_ratio})')
return repr_str
@PIPELINES.register_module()
class RandomFlip(object):
"""Flip the image & seg.
If the input dict contains the key "flip", then the flag will be used,
otherwise it will be randomly decided by a ratio specified in the init
method.
Args:
prob (float, optional): The flipping probability. Default: None.
2020-07-07 20:52:19 +08:00
direction(str, optional): The flipping direction. Options are
'horizontal' and 'vertical'. Default: 'horizontal'.
"""
@deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip')
def __init__(self, prob=None, direction='horizontal'):
self.prob = prob
2020-07-07 20:52:19 +08:00
self.direction = direction
if prob is not None:
assert prob >= 0 and prob <= 1
2020-07-07 20:52:19 +08:00
assert direction in ['horizontal', 'vertical']
def __call__(self, results):
"""Call function to flip bounding boxes, masks, semantic segmentation
maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Flipped results, 'flip', 'flip_direction' keys are added into
result dict.
"""
if 'flip' not in results:
flip = True if np.random.rand() < self.prob else False
2020-07-07 20:52:19 +08:00
results['flip'] = flip
if 'flip_direction' not in results:
results['flip_direction'] = self.direction
if results['flip']:
# flip image
results['img'] = mmcv.imflip(
results['img'], direction=results['flip_direction'])
# flip segs
for key in results.get('seg_fields', []):
# use copy() to make numpy stride positive
results[key] = mmcv.imflip(
results[key], direction=results['flip_direction']).copy()
return results
def __repr__(self):
return self.__class__.__name__ + f'(prob={self.prob})'
2020-07-07 20:52:19 +08:00
@PIPELINES.register_module()
class Pad(object):
"""Pad the image & mask.
There are two padding modes: (1) pad to a fixed size and (2) pad to the
minimum size that is divisible by some number.
Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
Args:
size (tuple, optional): Fixed padding size.
size_divisor (int, optional): The divisor of padded size.
pad_val (float, optional): Padding value. Default: 0.
seg_pad_val (float, optional): Padding value of segmentation map.
Default: 255.
"""
def __init__(self,
size=None,
size_divisor=None,
pad_val=0,
seg_pad_val=255):
self.size = size
self.size_divisor = size_divisor
self.pad_val = pad_val
self.seg_pad_val = seg_pad_val
# only one of size and size_divisor should be valid
assert size is not None or size_divisor is not None
assert size is None or size_divisor is None
def _pad_img(self, results):
"""Pad images according to ``self.size``."""
if self.size is not None:
padded_img = mmcv.impad(
results['img'], shape=self.size, pad_val=self.pad_val)
elif self.size_divisor is not None:
padded_img = mmcv.impad_to_multiple(
results['img'], self.size_divisor, pad_val=self.pad_val)
results['img'] = padded_img
results['pad_shape'] = padded_img.shape
results['pad_fixed_size'] = self.size
results['pad_size_divisor'] = self.size_divisor
def _pad_seg(self, results):
"""Pad masks according to ``results['pad_shape']``."""
for key in results.get('seg_fields', []):
results[key] = mmcv.impad(
results[key],
shape=results['pad_shape'][:2],
pad_val=self.seg_pad_val)
def __call__(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
self._pad_img(results)
self._pad_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \
f'pad_val={self.pad_val})'
return repr_str
@PIPELINES.register_module()
class Normalize(object):
"""Normalize the image.
Added key is "img_norm_cfg".
Args:
mean (sequence): Mean values of 3 channels.
std (sequence): Std values of 3 channels.
to_rgb (bool): Whether to convert the image from BGR to RGB,
default is true.
"""
def __init__(self, mean, std, to_rgb=True):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
def __call__(self, results):
"""Call function to normalize images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Normalized results, 'img_norm_cfg' key is added into
result dict.
"""
results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std,
self.to_rgb)
results['img_norm_cfg'] = dict(
mean=self.mean, std=self.std, to_rgb=self.to_rgb)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \
f'{self.to_rgb})'
return repr_str
@PIPELINES.register_module()
class Rerange(object):
"""Rerange the image pixel value.
Args:
min_value (float or int): Minimum value of the reranged image.
Default: 0.
max_value (float or int): Maximum value of the reranged image.
Default: 255.
"""
def __init__(self, min_value=0, max_value=255):
assert isinstance(min_value, float) or isinstance(min_value, int)
assert isinstance(max_value, float) or isinstance(max_value, int)
assert min_value < max_value
self.min_value = min_value
self.max_value = max_value
def __call__(self, results):
"""Call function to rerange images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Reranged results.
"""
img = results['img']
img_min_value = np.min(img)
img_max_value = np.max(img)
assert img_min_value < img_max_value
# rerange to [0, 1]
img = (img - img_min_value) / (img_max_value - img_min_value)
# rerange to [min_value, max_value]
img = img * (self.max_value - self.min_value) + self.min_value
results['img'] = img
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(min_value={self.min_value}, max_value={self.max_value})'
return repr_str
@PIPELINES.register_module()
class CLAHE(object):
"""Use CLAHE method to process the image.
See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
Graphics Gems, 1994:474-485.` for more information.
Args:
clip_limit (float): Threshold for contrast limiting. Default: 40.0.
tile_grid_size (tuple[int]): Size of grid for histogram equalization.
Input image will be divided into equally sized rectangular tiles.
It defines the number of tiles in row and column. Default: (8, 8).
"""
def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)):
assert isinstance(clip_limit, (float, int))
self.clip_limit = clip_limit
assert is_tuple_of(tile_grid_size, int)
assert len(tile_grid_size) == 2
self.tile_grid_size = tile_grid_size
def __call__(self, results):
"""Call function to Use CLAHE method process images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Processed results.
"""
for i in range(results['img'].shape[2]):
results['img'][:, :, i] = mmcv.clahe(
np.array(results['img'][:, :, i], dtype=np.uint8),
self.clip_limit, self.tile_grid_size)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(clip_limit={self.clip_limit}, '\
f'tile_grid_size={self.tile_grid_size})'
return repr_str
2020-07-07 20:52:19 +08:00
@PIPELINES.register_module()
class RandomCrop(object):
"""Random crop the image & seg.
Args:
crop_size (tuple): Expected size after cropping, (h, w).
cat_max_ratio (float): The maximum ratio that single category could
occupy.
"""
def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255):
assert crop_size[0] > 0 and crop_size[1] > 0
self.crop_size = crop_size
self.cat_max_ratio = cat_max_ratio
self.ignore_index = ignore_index
def get_crop_bbox(self, img):
"""Randomly get a crop bounding box."""
margin_h = max(img.shape[0] - self.crop_size[0], 0)
margin_w = max(img.shape[1] - self.crop_size[1], 0)
offset_h = np.random.randint(0, margin_h + 1)
offset_w = np.random.randint(0, margin_w + 1)
crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
return crop_y1, crop_y2, crop_x1, crop_x2
def crop(self, img, crop_bbox):
"""Crop from ``img``"""
crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox
img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
return img
def __call__(self, results):
"""Call function to randomly crop images, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Randomly cropped results, 'img_shape' key in result dict is
updated according to crop size.
"""
img = results['img']
crop_bbox = self.get_crop_bbox(img)
if self.cat_max_ratio < 1.:
# Repeat 10 times
for _ in range(10):
seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox)
labels, cnt = np.unique(seg_temp, return_counts=True)
cnt = cnt[labels != self.ignore_index]
if len(cnt) > 1 and np.max(cnt) / np.sum(
cnt) < self.cat_max_ratio:
break
crop_bbox = self.get_crop_bbox(img)
# crop the image
img = self.crop(img, crop_bbox)
img_shape = img.shape
results['img'] = img
results['img_shape'] = img_shape
# crop semantic seg
for key in results.get('seg_fields', []):
results[key] = self.crop(results[key], crop_bbox)
return results
def __repr__(self):
return self.__class__.__name__ + f'(crop_size={self.crop_size})'
@PIPELINES.register_module()
class RandomRotate(object):
"""Rotate the image & seg.
Args:
prob (float): The rotation probability.
degree (float, tuple[float]): Range of degrees to select from. If
degree is a number instead of tuple like (min, max),
the range of degree will be (``-degree``, ``+degree``)
pad_val (float, optional): Padding value of image. Default: 0.
seg_pad_val (float, optional): Padding value of segmentation map.
Default: 255.
center (tuple[float], optional): Center point (w, h) of the rotation in
the source image. If not specified, the center of the image will be
used. Default: None.
auto_bound (bool): Whether to adjust the image size to cover the whole
rotated image. Default: False
"""
def __init__(self,
prob,
degree,
pad_val=0,
seg_pad_val=255,
center=None,
auto_bound=False):
self.prob = prob
assert prob >= 0 and prob <= 1
if isinstance(degree, (float, int)):
assert degree > 0, f'degree {degree} should be positive'
self.degree = (-degree, degree)
else:
self.degree = degree
assert len(self.degree) == 2, f'degree {self.degree} should be a ' \
f'tuple of (min, max)'
self.pal_val = pad_val
self.seg_pad_val = seg_pad_val
self.center = center
self.auto_bound = auto_bound
def __call__(self, results):
"""Call function to rotate image, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Rotated results.
"""
rotate = True if np.random.rand() < self.prob else False
degree = np.random.uniform(min(*self.degree), max(*self.degree))
if rotate:
# rotate image
results['img'] = mmcv.imrotate(
results['img'],
angle=degree,
border_value=self.pal_val,
center=self.center,
auto_bound=self.auto_bound)
# rotate segs
for key in results.get('seg_fields', []):
results[key] = mmcv.imrotate(
results[key],
angle=degree,
border_value=self.seg_pad_val,
center=self.center,
auto_bound=self.auto_bound,
interpolation='nearest')
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(prob={self.prob}, ' \
f'degree={self.degree}, ' \
f'pad_val={self.pal_val}, ' \
f'seg_pad_val={self.seg_pad_val}, ' \
f'center={self.center}, ' \
f'auto_bound={self.auto_bound})'
return repr_str
@PIPELINES.register_module()
class RGB2Gray(object):
"""Convert RGB image to grayscale image.
This transform calculate the weighted mean of input image channels with
``weights`` and then expand the channels to ``out_channels``. When
``out_channels`` is None, the number of output channels is the same as
input channels.
Args:
out_channels (int): Expected number of output channels after
transforming. Default: None.
weights (tuple[float]): The weights to calculate the weighted mean.
Default: (0.299, 0.587, 0.114).
"""
def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)):
assert out_channels is None or out_channels > 0
self.out_channels = out_channels
assert isinstance(weights, tuple)
for item in weights:
assert isinstance(item, (float, int))
self.weights = weights
def __call__(self, results):
"""Call function to convert RGB image to grayscale image.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with grayscale image.
"""
img = results['img']
assert len(img.shape) == 3
assert img.shape[2] == len(self.weights)
weights = np.array(self.weights).reshape((1, 1, -1))
img = (img * weights).sum(2, keepdims=True)
if self.out_channels is None:
img = img.repeat(weights.shape[2], axis=2)
else:
img = img.repeat(self.out_channels, axis=2)
results['img'] = img
results['img_shape'] = img.shape
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(out_channels={self.out_channels}, ' \
f'weights={self.weights})'
return repr_str
@PIPELINES.register_module()
class AdjustGamma(object):
"""Using gamma correction to process the image.
Args:
gamma (float or int): Gamma value used in gamma correction.
Default: 1.0.
"""
def __init__(self, gamma=1.0):
assert isinstance(gamma, float) or isinstance(gamma, int)
assert gamma > 0
self.gamma = gamma
inv_gamma = 1.0 / gamma
self.table = np.array([(i / 255.0)**inv_gamma * 255
for i in np.arange(256)]).astype('uint8')
def __call__(self, results):
"""Call function to process the image with gamma correction.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Processed results.
"""
results['img'] = mmcv.lut_transform(
np.array(results['img'], dtype=np.uint8), self.table)
return results
def __repr__(self):
return self.__class__.__name__ + f'(gamma={self.gamma})'
2020-07-07 20:52:19 +08:00
@PIPELINES.register_module()
class SegRescale(object):
"""Rescale semantic segmentation maps.
Args:
scale_factor (float): The scale factor of the final output.
"""
def __init__(self, scale_factor=1):
self.scale_factor = scale_factor
def __call__(self, results):
"""Call function to scale the semantic segmentation map.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with semantic segmentation map scaled.
"""
for key in results.get('seg_fields', []):
if self.scale_factor != 1:
results[key] = mmcv.imrescale(
results[key], self.scale_factor, interpolation='nearest')
return results
def __repr__(self):
return self.__class__.__name__ + f'(scale_factor={self.scale_factor})'
@PIPELINES.register_module()
class PhotoMetricDistortion(object):
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def convert(self, img, alpha=1, beta=0):
"""Multiple with alpha and add beat with clip."""
img = img.astype(np.float32) * alpha + beta
img = np.clip(img, 0, 255)
return img.astype(np.uint8)
def brightness(self, img):
"""Brightness distortion."""
if random.randint(2):
return self.convert(
img,
beta=random.uniform(-self.brightness_delta,
self.brightness_delta))
return img
def contrast(self, img):
"""Contrast distortion."""
if random.randint(2):
return self.convert(
img,
alpha=random.uniform(self.contrast_lower, self.contrast_upper))
return img
def saturation(self, img):
"""Saturation distortion."""
if random.randint(2):
img = mmcv.bgr2hsv(img)
img[:, :, 1] = self.convert(
img[:, :, 1],
alpha=random.uniform(self.saturation_lower,
self.saturation_upper))
img = mmcv.hsv2bgr(img)
return img
def hue(self, img):
"""Hue distortion."""
if random.randint(2):
img = mmcv.bgr2hsv(img)
img[:, :,
0] = (img[:, :, 0].astype(int) +
random.randint(-self.hue_delta, self.hue_delta)) % 180
img = mmcv.hsv2bgr(img)
return img
def __call__(self, results):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
img = results['img']
# random brightness
img = self.brightness(img)
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
img = self.contrast(img)
# random saturation
img = self.saturation(img)
# random hue
img = self.hue(img)
# random contrast
if mode == 0:
img = self.contrast(img)
results['img'] = img
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (f'(brightness_delta={self.brightness_delta}, '
f'contrast_range=({self.contrast_lower}, '
f'{self.contrast_upper}), '
f'saturation_range=({self.saturation_lower}, '
f'{self.saturation_upper}), '
f'hue_delta={self.hue_delta})')
return repr_str
@PIPELINES.register_module()
class RandomCutOut(object):
"""CutOut operation.
Randomly drop some regions of image used in
`Cutout <https://arxiv.org/abs/1708.04552>`_.
Args:
prob (float): cutout probability.
n_holes (int | tuple[int, int]): Number of regions to be dropped.
If it is given as a list, number of holes will be randomly
selected from the closed interval [`n_holes[0]`, `n_holes[1]`].
cutout_shape (tuple[int, int] | list[tuple[int, int]]): The candidate
shape of dropped regions. It can be `tuple[int, int]` to use a
fixed cutout shape, or `list[tuple[int, int]]` to randomly choose
shape from the list.
cutout_ratio (tuple[float, float] | list[tuple[float, float]]): The
candidate ratio of dropped regions. It can be `tuple[float, float]`
to use a fixed ratio or `list[tuple[float, float]]` to randomly
choose ratio from the list. Please note that `cutout_shape`
and `cutout_ratio` cannot be both given at the same time.
fill_in (tuple[float, float, float] | tuple[int, int, int]): The value
of pixel to fill in the dropped regions. Default: (0, 0, 0).
seg_fill_in (int): The labels of pixel to fill in the dropped regions.
If seg_fill_in is None, skip. Default: None.
"""
def __init__(self,
prob,
n_holes,
cutout_shape=None,
cutout_ratio=None,
fill_in=(0, 0, 0),
seg_fill_in=None):
assert 0 <= prob and prob <= 1
assert (cutout_shape is None) ^ (cutout_ratio is None), \
'Either cutout_shape or cutout_ratio should be specified.'
assert (isinstance(cutout_shape, (list, tuple))
or isinstance(cutout_ratio, (list, tuple)))
if isinstance(n_holes, tuple):
assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1]
else:
n_holes = (n_holes, n_holes)
if seg_fill_in is not None:
assert (isinstance(seg_fill_in, int) and 0 <= seg_fill_in
and seg_fill_in <= 255)
self.prob = prob
self.n_holes = n_holes
self.fill_in = fill_in
self.seg_fill_in = seg_fill_in
self.with_ratio = cutout_ratio is not None
self.candidates = cutout_ratio if self.with_ratio else cutout_shape
if not isinstance(self.candidates, list):
self.candidates = [self.candidates]
def __call__(self, results):
"""Call function to drop some regions of image."""
cutout = True if np.random.rand() < self.prob else False
if cutout:
h, w, c = results['img'].shape
n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1)
for _ in range(n_holes):
x1 = np.random.randint(0, w)
y1 = np.random.randint(0, h)
index = np.random.randint(0, len(self.candidates))
if not self.with_ratio:
cutout_w, cutout_h = self.candidates[index]
else:
cutout_w = int(self.candidates[index][0] * w)
cutout_h = int(self.candidates[index][1] * h)
x2 = np.clip(x1 + cutout_w, 0, w)
y2 = np.clip(y1 + cutout_h, 0, h)
results['img'][y1:y2, x1:x2, :] = self.fill_in
if self.seg_fill_in is not None:
for key in results.get('seg_fields', []):
results[key][y1:y2, x1:x2] = self.seg_fill_in
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(prob={self.prob}, '
repr_str += f'n_holes={self.n_holes}, '
repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio
else f'cutout_shape={self.candidates}, ')
repr_str += f'fill_in={self.fill_in}, '
repr_str += f'seg_fill_in={self.seg_fill_in})'
return repr_str