From 82211b407e5eb25a826ee783637769043eee1365 Mon Sep 17 00:00:00 2001 From: Xiaojie Li Date: Thu, 2 Jul 2020 19:34:46 +0800 Subject: [PATCH] Added new features to `impad` (#377) * update impad * fix docstring * add shape for impad * fix unit test * remove old version & fix doc * fix linting * fix doc Co-authored-by: lixiaojie --- docs/image.md | 11 ++- mmcv/image/geometric.py | 88 ++++++++++++++++++++---- tests/test_image/test_geometric.py | 107 +++++++++++++++++++++++++++-- 3 files changed, 183 insertions(+), 23 deletions(-) diff --git a/docs/image.md b/docs/image.md index 9d25a1f01..5e716d0be 100644 --- a/docs/image.md +++ b/docs/image.md @@ -137,10 +137,17 @@ specific size with given values. img = mmcv.imread('tests/data/color.jpg') # pad the image to (1000, 1200) with all zeros -img_ = mmcv.impad(img, (1000, 1200), pad_val=0) +img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # pad the image to (1000, 1200) with different values for three channels. -img_ = mmcv.impad(img, (1000, 1200), pad_val=[100, 50, 200]) +img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200]) + +# pad the image on left, right, top, bottom borders with all zeros +img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) + +# pad the image on left, right, top, bottom borders with different values +# for three channels. +img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200]) # pad an image so that each edge is a multiple of some value. img_ = mmcv.impad_to_multiple(img, 32) diff --git a/mmcv/image/geometric.py b/mmcv/image/geometric.py index 6cb5110b3..2e3a6a3dd 100644 --- a/mmcv/image/geometric.py +++ b/mmcv/image/geometric.py @@ -1,4 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. +import numbers + import cv2 import numpy as np @@ -311,29 +313,85 @@ def imcrop(img, bboxes, scale=1.0, pad_fill=None): return patches -def impad(img, shape, pad_val=0): - """Pad an image to a certain shape. +def impad(img, + *, + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. Args: img (ndarray): Image to be padded. - shape (tuple[int]): Expected padding shape (h, w). + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. pad_val (Number | Sequence[Number]): Values to be filled in padding - areas. Default: 0. + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] Returns: ndarray: The padded image. """ - if not isinstance(pad_val, (int, float)): + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): assert len(pad_val) == img.shape[-1] - if len(shape) < len(img.shape): - shape = shape + (img.shape[-1], ) - assert len(shape) == len(img.shape) - for s, img_s in zip(shape, img.shape): - assert s >= img_s - pad = np.empty(shape, dtype=img.dtype) - pad[...] = pad_val - pad[:img.shape[0], :img.shape[1], ...] = img - return pad + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' + f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError("Padding must be a int or a 2, or 4 element tuple." + f"But received {padding}") + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT + } + img = cv2.copyMakeBorder( + img, + padding[1], + padding[3], + padding[0], + padding[2], + border_type[padding_mode], + value=pad_val) + + return img def impad_to_multiple(img, divisor, pad_val=0): @@ -349,4 +407,4 @@ def impad_to_multiple(img, divisor, pad_val=0): """ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor - return impad(img, (pad_h, pad_w), pad_val) + return impad(img, shape=(pad_h, pad_w)) diff --git a/tests/test_image/test_geometric.py b/tests/test_image/test_geometric.py index e63412b31..8c77ae597 100644 --- a/tests/test_image/test_geometric.py +++ b/tests/test_image/test_geometric.py @@ -223,7 +223,7 @@ class TestGeometric: def test_impad(self): # grayscale image img = np.random.rand(10, 10).astype(np.float32) - padded_img = mmcv.impad(img, (15, 12), 0) + padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0) assert_array_equal(img, padded_img[:10, :10]) assert_array_equal( np.zeros((5, 12), dtype='float32'), padded_img[10:, :]) @@ -232,15 +232,17 @@ class TestGeometric: # RGB image img = np.random.rand(10, 10, 3).astype(np.float32) - padded_img = mmcv.impad(img, (15, 12), 0) + padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :]) assert_array_equal( np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :]) + # RGB image with different values for three channels. img = np.random.randint(256, size=(10, 10, 3)).astype('uint8') - padded_img = mmcv.impad(img, (15, 12, 3), [100, 110, 120]) + padded_img = mmcv.impad( + img, padding=(0, 0, 2, 5), pad_val=(100, 110, 120)) assert_array_equal(img, padded_img[:10, :10, :]) assert_array_equal( np.array([100, 110, 120], dtype='uint8') * np.ones( @@ -249,12 +251,105 @@ class TestGeometric: np.array([100, 110, 120], dtype='uint8') * np.ones( (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :]) + # Pad the grayscale image to shape (15, 12) + img = np.random.rand(10, 10).astype(np.float32) + padded_img = mmcv.impad(img, shape=(15, 12)) + assert_array_equal(img, padded_img[:10, :10]) + assert_array_equal( + np.zeros((5, 12), dtype='float32'), padded_img[10:, :]) + assert_array_equal( + np.zeros((15, 2), dtype='float32'), padded_img[:, 10:]) + + # Pad the RGB image to shape (15, 12) + img = np.random.rand(10, 10, 3).astype(np.float32) + padded_img = mmcv.impad(img, shape=(15, 12)) + assert_array_equal(img, padded_img[:10, :10, :]) + assert_array_equal( + np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :]) + assert_array_equal( + np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :]) + + # Pad the RGB image to shape (15, 12) with different values for + # three channels. + img = np.random.randint(256, size=(10, 10, 3)).astype('uint8') + padded_img = mmcv.impad(img, shape=(15, 12), pad_val=(100, 110, 120)) + assert_array_equal(img, padded_img[:10, :10, :]) + assert_array_equal( + np.array([100, 110, 120], dtype='uint8') * np.ones( + (5, 12, 3), dtype='uint8'), padded_img[10:, :, :]) + assert_array_equal( + np.array([100, 110, 120], dtype='uint8') * np.ones( + (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :]) + + # RGB image with padding=[5, 2] + img = np.random.rand(10, 10, 3).astype(np.float32) + padded_img = mmcv.impad(img, padding=(5, 2), pad_val=0) + + assert padded_img.shape == (14, 20, 3) + assert_array_equal(img, padded_img[2:12, 5:15, :]) + assert_array_equal( + np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, :5, :]) + assert_array_equal( + np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, :5, :]) + assert_array_equal( + np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, 15:, :]) + assert_array_equal( + np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, 15:, :]) + + # RGB image with type(pad_val) = tuple + pad_val = (0, 1, 2) + img = np.random.rand(10, 10, 3).astype(np.float32) + padded_img = mmcv.impad(img, padding=(0, 0, 5, 2), pad_val=pad_val) + + assert padded_img.shape == (12, 15, 3) + assert_array_equal(img, padded_img[:10, :10, :]) + assert_array_equal(pad_val[0] * np.ones((2, 15, 1), dtype='float32'), + padded_img[10:, :, 0:1]) + assert_array_equal(pad_val[1] * np.ones((2, 15, 1), dtype='float32'), + padded_img[10:, :, 1:2]) + assert_array_equal(pad_val[2] * np.ones((2, 15, 1), dtype='float32'), + padded_img[10:, :, 2:3]) + + assert_array_equal(pad_val[0] * np.ones((12, 5, 1), dtype='float32'), + padded_img[:, 10:, 0:1]) + assert_array_equal(pad_val[1] * np.ones((12, 5, 1), dtype='float32'), + padded_img[:, 10:, 1:2]) + assert_array_equal(pad_val[2] * np.ones((12, 5, 1), dtype='float32'), + padded_img[:, 10:, 2:3]) + + # test different padding mode with channel number = 3 + for mode in ['constant', 'edge', 'reflect', 'symmetric']: + img = np.random.rand(10, 10, 3).astype(np.float32) + padded_img = mmcv.impad( + img, padding=(0, 0, 5, 2), pad_val=pad_val, padding_mode=mode) + assert padded_img.shape == (12, 15, 3) + + # test different padding mode with channel number = 1 + for mode in ['constant', 'edge', 'reflect', 'symmetric']: + img = np.random.rand(10, 10).astype(np.float32) + padded_img = mmcv.impad( + img, padding=(0, 0, 5, 2), pad_val=0, padding_mode=mode) + assert padded_img.shape == (12, 15) + + # Padding must be a int or a 2, or 4 element tuple. + with pytest.raises(ValueError): + mmcv.impad(img, padding=(1, 1, 1)) + + # pad_val must be a int or a tuple + with pytest.raises(TypeError): + mmcv.impad(img, padding=(1, 1, 1, 1), pad_val='wrong') + + # When pad_val is a tuple, + # len(pad_val) should be equal to img.shape[-1] + img = np.random.rand(10, 10, 3).astype(np.float32) with pytest.raises(AssertionError): - mmcv.impad(img, (15, ), 0) + mmcv.impad(img, padding=3, pad_val=(100, 200)) + with pytest.raises(AssertionError): - mmcv.impad(img, (5, 5), 0) + mmcv.impad(img, padding=2, pad_val=0, padding_mode='unknown') + with pytest.raises(AssertionError): - mmcv.impad(img, (5, 5), [0, 1]) + mmcv.impad(img, shape=(12, 15), padding=(0, 0, 5, 2)) def test_impad_to_multiple(self): img = np.random.rand(11, 14, 3).astype(np.float32)