Merge pull request #21 from yl-1993/dev/resize_backend
Add PIL backend for resize transformpull/22/head
commit
3e2d3d726a
|
@ -0,0 +1,41 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='RandomResizedCrop', size=224, backend='pillow'),
|
||||
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='ToTensor', keys=['gt_label']),
|
||||
dict(type='Collect', keys=['img', 'gt_label'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', size=256, backend='pillow'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='ToTensor', keys=['gt_label']),
|
||||
dict(type='Collect', keys=['img', 'gt_label'])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=32,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/train',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
# replace `data/val` with `data/test` for standard test
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=1, metric='accuracy')
|
|
@ -0,0 +1,41 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ImageNet'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='RandomResizedCrop', size=224, backend='pillow'),
|
||||
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='ToTensor', keys=['gt_label']),
|
||||
dict(type='Collect', keys=['img', 'gt_label'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', size=256, backend='pillow'),
|
||||
dict(type='CenterCrop', crop_size=224),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='ToTensor', keys=['gt_label']),
|
||||
dict(type='Collect', keys=['img', 'gt_label'])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=64,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/train',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
# replace `data/val` with `data/test` for standard test
|
||||
type=dataset_type,
|
||||
data_prefix='data/imagenet/val',
|
||||
ann_file='data/imagenet/meta/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=1, metric='accuracy')
|
|
@ -1,6 +1,6 @@
|
|||
_base_ = [
|
||||
'../_base_/models/mobilenet_v2_1x.py',
|
||||
'../_base_/datasets/imagenet_bs32.py',
|
||||
'../_base_/datasets/imagenet_bs32_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs256_epochstep.py',
|
||||
'../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
_base_ = [
|
||||
'../_base_/models/seresnet101.py', '../_base_/datasets/imagenet_bs32.py',
|
||||
'../_base_/models/seresnet101.py',
|
||||
'../_base_/datasets/imagenet_bs32_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
_base_ = [
|
||||
'../_base_/models/seresnet50.py', '../_base_/datasets/imagenet_bs32.py',
|
||||
'../_base_/models/seresnet50.py',
|
||||
'../_base_/datasets/imagenet_bs32_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs256_140e.py',
|
||||
'../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
_base_ = [
|
||||
'../_base_/models/seresnext101_32x4d.py',
|
||||
'../_base_/datasets/imagenet_bs32.py',
|
||||
'../_base_/datasets/imagenet_bs32_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
_base_ = [
|
||||
'../_base_/models/seresnext50_32x4d.py',
|
||||
'../_base_/datasets/imagenet_bs32.py',
|
||||
'../_base_/datasets/imagenet_bs32_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
_base_ = [
|
||||
'../_base_/models/shufflenet_v1_1x.py',
|
||||
'../_base_/datasets/imagenet_bs64.py',
|
||||
'../_base_/datasets/imagenet_bs64_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs1024_lineardecay.py',
|
||||
'../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
_base_ = [
|
||||
'../_base_/models/shufflenet_v2_1x.py',
|
||||
'../_base_/datasets/imagenet_bs64.py',
|
||||
'../_base_/datasets/imagenet_bs64_pil_resize.py',
|
||||
'../_base_/schedules/imagenet_bs2048_lineardecay.py',
|
||||
'../_base_/default_runtime.py'
|
||||
]
|
||||
|
|
|
@ -140,13 +140,16 @@ class RandomResizedCrop(object):
|
|||
interpolation (str): Interpolation method, accepted values are
|
||||
'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Default:
|
||||
'bilinear'.
|
||||
backend (str): The image resize backend type, accpeted values are
|
||||
`cv2` and `pillow`. Default: `cv2`.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
size,
|
||||
scale=(0.08, 1.0),
|
||||
ratio=(3. / 4., 4. / 3.),
|
||||
interpolation='bilinear'):
|
||||
interpolation='bilinear',
|
||||
backend='cv2'):
|
||||
if isinstance(size, (tuple, list)):
|
||||
self.size = size
|
||||
else:
|
||||
|
@ -154,10 +157,14 @@ class RandomResizedCrop(object):
|
|||
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
|
||||
raise ValueError("range should be of kind (min, max). "
|
||||
f"But received {scale}")
|
||||
if backend not in ['cv2', 'pillow']:
|
||||
raise ValueError(f'backend: {backend} is not supported for resize.'
|
||||
'Supported backends are "cv2", "pillow"')
|
||||
|
||||
self.interpolation = interpolation
|
||||
self.scale = scale
|
||||
self.ratio = ratio
|
||||
self.backend = backend
|
||||
|
||||
@staticmethod
|
||||
def get_params(img, scale, ratio):
|
||||
|
@ -225,7 +232,10 @@ class RandomResizedCrop(object):
|
|||
xmin + target_height - 1
|
||||
]))
|
||||
results[key] = mmcv.imresize(
|
||||
img, tuple(self.size[::-1]), interpolation=self.interpolation)
|
||||
img,
|
||||
tuple(self.size[::-1]),
|
||||
interpolation=self.interpolation,
|
||||
backend=self.backend)
|
||||
return results
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -331,9 +341,11 @@ class Resize(object):
|
|||
interpolation (str): Interpolation method, accepted values are
|
||||
"nearest", "bilinear", "bicubic", "area", "lanczos".
|
||||
More details can be found in `mmcv.image.geometric`.
|
||||
backend (str): The image resize backend type, accpeted values are
|
||||
`cv2` and `pillow`. Default: `cv2`.
|
||||
"""
|
||||
|
||||
def __init__(self, size, interpolation='bilinear'):
|
||||
def __init__(self, size, interpolation='bilinear', backend='cv2'):
|
||||
assert isinstance(size, int) or (isinstance(size, tuple)
|
||||
and len(size) == 2)
|
||||
if isinstance(size, int):
|
||||
|
@ -341,11 +353,15 @@ class Resize(object):
|
|||
assert size[0] > 0 and size[1] > 0
|
||||
assert interpolation in ("nearest", "bilinear", "bicubic", "area",
|
||||
"lanczos")
|
||||
if backend not in ['cv2', 'pillow']:
|
||||
raise ValueError(f'backend: {backend} is not supported for resize.'
|
||||
'Supported backends are "cv2", "pillow"')
|
||||
|
||||
self.height = size[0]
|
||||
self.width = size[1]
|
||||
self.size = size
|
||||
self.interpolation = interpolation
|
||||
self.backend = backend
|
||||
|
||||
def _resize_img(self, results):
|
||||
for key in results.get('img_fields', ['img']):
|
||||
|
@ -353,7 +369,8 @@ class Resize(object):
|
|||
results[key],
|
||||
size=(self.width, self.height),
|
||||
interpolation=self.interpolation,
|
||||
return_scale=False)
|
||||
return_scale=False,
|
||||
backend=self.backend)
|
||||
results[key] = img
|
||||
results['img_shape'] = img.shape
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ def test_resize():
|
|||
results['img2'] = copy.deepcopy(original_img)
|
||||
results['img_shape'] = original_img.shape
|
||||
results['ori_shape'] = original_img.shape
|
||||
results['img_fields'] = ['img', 'img2']
|
||||
return results
|
||||
|
||||
# test resize when size is int
|
||||
|
@ -101,6 +102,26 @@ def test_resize():
|
|||
assert np.equal(results['img'], results['img2']).all()
|
||||
assert results['img_shape'] == (img_height * 2, img_width * 2, 3)
|
||||
|
||||
# test resize with different backends
|
||||
transform_cv2 = dict(
|
||||
type='Resize',
|
||||
size=(224, 256),
|
||||
interpolation='bilinear',
|
||||
backend='cv2')
|
||||
transform_pil = dict(
|
||||
type='Resize',
|
||||
size=(224, 256),
|
||||
interpolation='bilinear',
|
||||
backend='pillow')
|
||||
resize_module_cv2 = build_from_cfg(transform_cv2, PIPELINES)
|
||||
resize_module_pil = build_from_cfg(transform_pil, PIPELINES)
|
||||
results = reset_results(results, original_img)
|
||||
results['img_fields'] = ['img']
|
||||
results_cv2 = resize_module_cv2(results)
|
||||
results['img_fields'] = ['img2']
|
||||
results_pil = resize_module_pil(results)
|
||||
assert np.allclose(results_cv2['img'], results_pil['img2'], atol=45)
|
||||
|
||||
# compare results with torchvision
|
||||
transform = dict(type='Resize', size=(224, 224), interpolation='area')
|
||||
resize_module = build_from_cfg(transform, PIPELINES)
|
||||
|
|
Loading…
Reference in New Issue