Merge pull request #21 from yl-1993/dev/resize_backend

Add PIL backend for resize transform
pull/22/head
Lei Yang 2020-08-07 21:50:51 +08:00 committed by GitHub
commit 3e2d3d726a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 133 additions and 11 deletions

View File

@ -0,0 +1,41 @@
# dataset settings
dataset_type = 'ImageNet'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', size=224, backend='pillow'),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=256, backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_prefix='data/imagenet/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline),
test=dict(
# replace `data/val` with `data/test` for standard test
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='accuracy')

View File

@ -0,0 +1,41 @@
# dataset settings
dataset_type = 'ImageNet'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', size=224, backend='pillow'),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', size=256, backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_prefix='data/imagenet/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline),
test=dict(
# replace `data/val` with `data/test` for standard test
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='accuracy')

View File

@ -1,6 +1,6 @@
_base_ = [
'../_base_/models/mobilenet_v2_1x.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256_epochstep.py',
'../_base_/default_runtime.py'
]

View File

@ -1,4 +1,5 @@
_base_ = [
'../_base_/models/seresnet101.py', '../_base_/datasets/imagenet_bs32.py',
'../_base_/models/seresnet101.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
]

View File

@ -1,5 +1,6 @@
_base_ = [
'../_base_/models/seresnet50.py', '../_base_/datasets/imagenet_bs32.py',
'../_base_/models/seresnet50.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256_140e.py',
'../_base_/default_runtime.py'
]

View File

@ -1,5 +1,5 @@
_base_ = [
'../_base_/models/seresnext101_32x4d.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
]

View File

@ -1,5 +1,5 @@
_base_ = [
'../_base_/models/seresnext50_32x4d.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
]

View File

@ -1,6 +1,6 @@
_base_ = [
'../_base_/models/shufflenet_v1_1x.py',
'../_base_/datasets/imagenet_bs64.py',
'../_base_/datasets/imagenet_bs64_pil_resize.py',
'../_base_/schedules/imagenet_bs1024_lineardecay.py',
'../_base_/default_runtime.py'
]

View File

@ -1,6 +1,6 @@
_base_ = [
'../_base_/models/shufflenet_v2_1x.py',
'../_base_/datasets/imagenet_bs64.py',
'../_base_/datasets/imagenet_bs64_pil_resize.py',
'../_base_/schedules/imagenet_bs2048_lineardecay.py',
'../_base_/default_runtime.py'
]

View File

@ -140,13 +140,16 @@ class RandomResizedCrop(object):
interpolation (str): Interpolation method, accepted values are
'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Default:
'bilinear'.
backend (str): The image resize backend type, accpeted values are
`cv2` and `pillow`. Default: `cv2`.
"""
def __init__(self,
size,
scale=(0.08, 1.0),
ratio=(3. / 4., 4. / 3.),
interpolation='bilinear'):
interpolation='bilinear',
backend='cv2'):
if isinstance(size, (tuple, list)):
self.size = size
else:
@ -154,10 +157,14 @@ class RandomResizedCrop(object):
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
raise ValueError("range should be of kind (min, max). "
f"But received {scale}")
if backend not in ['cv2', 'pillow']:
raise ValueError(f'backend: {backend} is not supported for resize.'
'Supported backends are "cv2", "pillow"')
self.interpolation = interpolation
self.scale = scale
self.ratio = ratio
self.backend = backend
@staticmethod
def get_params(img, scale, ratio):
@ -225,7 +232,10 @@ class RandomResizedCrop(object):
xmin + target_height - 1
]))
results[key] = mmcv.imresize(
img, tuple(self.size[::-1]), interpolation=self.interpolation)
img,
tuple(self.size[::-1]),
interpolation=self.interpolation,
backend=self.backend)
return results
def __repr__(self):
@ -331,9 +341,11 @@ class Resize(object):
interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos".
More details can be found in `mmcv.image.geometric`.
backend (str): The image resize backend type, accpeted values are
`cv2` and `pillow`. Default: `cv2`.
"""
def __init__(self, size, interpolation='bilinear'):
def __init__(self, size, interpolation='bilinear', backend='cv2'):
assert isinstance(size, int) or (isinstance(size, tuple)
and len(size) == 2)
if isinstance(size, int):
@ -341,11 +353,15 @@ class Resize(object):
assert size[0] > 0 and size[1] > 0
assert interpolation in ("nearest", "bilinear", "bicubic", "area",
"lanczos")
if backend not in ['cv2', 'pillow']:
raise ValueError(f'backend: {backend} is not supported for resize.'
'Supported backends are "cv2", "pillow"')
self.height = size[0]
self.width = size[1]
self.size = size
self.interpolation = interpolation
self.backend = backend
def _resize_img(self, results):
for key in results.get('img_fields', ['img']):
@ -353,7 +369,8 @@ class Resize(object):
results[key],
size=(self.width, self.height),
interpolation=self.interpolation,
return_scale=False)
return_scale=False,
backend=self.backend)
results[key] = img
results['img_shape'] = img.shape

View File

@ -64,6 +64,7 @@ def test_resize():
results['img2'] = copy.deepcopy(original_img)
results['img_shape'] = original_img.shape
results['ori_shape'] = original_img.shape
results['img_fields'] = ['img', 'img2']
return results
# test resize when size is int
@ -101,6 +102,26 @@ def test_resize():
assert np.equal(results['img'], results['img2']).all()
assert results['img_shape'] == (img_height * 2, img_width * 2, 3)
# test resize with different backends
transform_cv2 = dict(
type='Resize',
size=(224, 256),
interpolation='bilinear',
backend='cv2')
transform_pil = dict(
type='Resize',
size=(224, 256),
interpolation='bilinear',
backend='pillow')
resize_module_cv2 = build_from_cfg(transform_cv2, PIPELINES)
resize_module_pil = build_from_cfg(transform_pil, PIPELINES)
results = reset_results(results, original_img)
results['img_fields'] = ['img']
results_cv2 = resize_module_cv2(results)
results['img_fields'] = ['img2']
results_pil = resize_module_pil(results)
assert np.allclose(results_cv2['img'], results_pil['img2'], atol=45)
# compare results with torchvision
transform = dict(type='Resize', size=(224, 224), interpolation='area')
resize_module = build_from_cfg(transform, PIPELINES)