Merge branch 'zhengmiao/randomcrop' into 'refactor_dev'

[Enhancement] Revise RandomCrop

See merge request openmmlab-enterprise/openmmlab-ce/mmsegmentation!20
This commit is contained in:
zhengmiao 2022-05-28 08:04:57 +00:00
commit 8ffd9e44d7
22 changed files with 27 additions and 290 deletions

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -9,20 +9,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -6,20 +6,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -6,20 +6,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -6,20 +6,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -6,20 +6,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -9,20 +9,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -9,20 +9,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -16,20 +16,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -11,20 +11,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -11,20 +11,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -9,20 +9,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -8,20 +8,7 @@ train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
dict(
type='TransformBroadcaster',
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True,
transforms=[
dict(
type='mmseg.RandomCrop',
crop_size=crop_size,
cat_max_ratio=0.75),
]),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),

View File

@ -237,7 +237,7 @@ class RandomCrop(BaseTransform):
if self.cat_max_ratio < 1.:
# Repeat 10 times
for _ in range(10):
seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox)
seg_temp = self.crop(results['gt_seg_map'], crop_bbox)
labels, cnt = np.unique(seg_temp, return_counts=True)
cnt = cnt[labels != self.ignore_index]
if len(cnt) > 1 and np.max(cnt) / np.sum(
@ -279,6 +279,10 @@ class RandomCrop(BaseTransform):
# crop the image
img = self.crop(img, crop_bbox)
# crop semantic seg
for key in results.get('seg_fields', []):
results[key] = self.crop(results[key], crop_bbox)
img_shape = img.shape
results['img'] = img
results['img_shape'] = img_shape

View File

@ -4,7 +4,6 @@ import os.path as osp
import mmcv
import numpy as np
import pytest
from mmcv.transforms.wrappers import TransformBroadcaster
from PIL import Image
from mmseg.datasets.pipelines import PhotoMetricDistortion, RandomCrop
@ -28,14 +27,8 @@ def test_random_crop():
results['scale_factor'] = 1.0
h, w, _ = img.shape
pipeline = TransformBroadcaster(
transforms=[RandomCrop(crop_size=(h - 20, w - 20))],
mapping={
'img': ['img', 'gt_semantic_seg'],
'img_shape': [..., 'img_shape']
},
auto_remap=True,
share_random_params=True)
pipeline = RandomCrop(crop_size=(h - 20, w - 20))
results = pipeline(results)
assert results['img'].shape[:2] == (h - 20, w - 20)
assert results['img_shape'][:2] == (h - 20, w - 20)