Added new features to `impad` (#377)

* update impad * fix docstring * add shape for impad * fix unit test * remove old version & fix doc * fix linting * fix doc Co-authored-by: lixiaojie <lixiaojie@sensetime.com>
2020-07-02 19:34:46 +08:00 · 2020-07-02 19:34:46 +08:00 · 82211b407e
parent cab4031454
commit 82211b407e
3 changed files with 183 additions and 23 deletions
--- a/docs/image.md
+++ b/docs/image.md
@ -137,10 +137,17 @@ specific size with given values.
 img = mmcv.imread('tests/data/color.jpg')

 # pad the image to (1000, 1200) with all zeros
-img_ = mmcv.impad(img, (1000, 1200), pad_val=0)
+img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0)

 # pad the image to (1000, 1200) with different values for three channels.
-img_ = mmcv.impad(img, (1000, 1200), pad_val=[100, 50, 200])
+img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200])
+
+# pad the image on left, right, top, bottom borders with all zeros
+img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0)
+
+# pad the image on left, right, top, bottom borders with different values 
+# for three channels.
+img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200])

 # pad an image so that each edge is a multiple of some value.
 img_ = mmcv.impad_to_multiple(img, 32)
--- a/mmcv/image/geometric.py
+++ b/mmcv/image/geometric.py
@ -1,4 +1,6 @@
 # Copyright (c) Open-MMLab. All rights reserved.
+import numbers
+
 import cv2
 import numpy as np

@ -311,29 +313,85 @@ def imcrop(img, bboxes, scale=1.0, pad_fill=None):
        return patches


-def impad(img, shape, pad_val=0):
-    """Pad an image to a certain shape.
+def impad(img,
+          *,
+          shape=None,
+          padding=None,
+          pad_val=0,
+          padding_mode='constant'):
+    """Pad the given image to a certain shape or pad on all sides with
+    specified padding mode and padding value.

    Args:
        img (ndarray): Image to be padded.
-        shape (tuple[int]): Expected padding shape (h, w).
+        shape (tuple[int]): Expected padding shape (h, w). Default: None.
+        padding (int or tuple[int]): Padding on each border. If a single int is
+            provided this is used to pad all borders. If tuple of length 2 is
+            provided this is the padding on left/right and top/bottom
+            respectively. If a tuple of length 4 is provided this is the
+            padding for the left, top, right and bottom borders respectively.
+            Default: None. Note that `shape` and `padding` can not be both
+            set.
        pad_val (Number | Sequence[Number]): Values to be filled in padding
-            areas. Default: 0.
+            areas when padding_mode is 'constant'. Default: 0.
+        padding_mode (str): Type of padding. Should be: constant, edge,
+            reflect or symmetric. Default: constant.
+            - constant: pads with a constant value, this value is specified
+                with pad_val.
+            - edge: pads with the last value at the edge of the image.
+            - reflect: pads with reflection of image without repeating the
+                last value on the edge. For example, padding [1, 2, 3, 4]
+                with 2 elements on both sides in reflect mode will result
+                in [3, 2, 1, 2, 3, 4, 3, 2].
+            - symmetric: pads with reflection of image repeating the last
+                value on the edge. For example, padding [1, 2, 3, 4] with
+                2 elements on both sides in symmetric mode will result in
+                [2, 1, 1, 2, 3, 4, 4, 3]

    Returns:
        ndarray: The padded image.
    """
-    if not isinstance(pad_val, (int, float)):
+
+    assert (shape is not None) ^ (padding is not None)
+    if shape is not None:
+        padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])
+
+    # check pad_val
+    if isinstance(pad_val, tuple):
        assert len(pad_val) == img.shape[-1]
-    if len(shape) < len(img.shape):
-        shape = shape + (img.shape[-1], )
-    assert len(shape) == len(img.shape)
-    for s, img_s in zip(shape, img.shape):
-        assert s >= img_s
-    pad = np.empty(shape, dtype=img.dtype)
-    pad[...] = pad_val
-    pad[:img.shape[0], :img.shape[1], ...] = img
-    return pad
+    elif not isinstance(pad_val, numbers.Number):
+        raise TypeError('pad_val must be a int or a tuple. '
+                        f'But received {type(pad_val)}')
+
+    # check padding
+    if isinstance(padding, tuple) and len(padding) in [2, 4]:
+        if len(padding) == 2:
+            padding = (padding[0], padding[1], padding[0], padding[1])
+    elif isinstance(padding, numbers.Number):
+        padding = (padding, padding, padding, padding)
+    else:
+        raise ValueError("Padding must be a int or a 2, or 4 element tuple."
+                         f"But received {padding}")
+
+    # check padding mode
+    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
+
+    border_type = {
+        'constant': cv2.BORDER_CONSTANT,
+        'edge': cv2.BORDER_REPLICATE,
+        'reflect': cv2.BORDER_REFLECT_101,
+        'symmetric': cv2.BORDER_REFLECT
+    }
+    img = cv2.copyMakeBorder(
+        img,
+        padding[1],
+        padding[3],
+        padding[0],
+        padding[2],
+        border_type[padding_mode],
+        value=pad_val)
+
+    return img


 def impad_to_multiple(img, divisor, pad_val=0):
@ -349,4 +407,4 @@ def impad_to_multiple(img, divisor, pad_val=0):
    """
    pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
    pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
-    return impad(img, (pad_h, pad_w), pad_val)
+    return impad(img, shape=(pad_h, pad_w))
--- a/tests/test_image/test_geometric.py
+++ b/tests/test_image/test_geometric.py
@ -223,7 +223,7 @@ class TestGeometric:
    def test_impad(self):
        # grayscale image
        img = np.random.rand(10, 10).astype(np.float32)
-        padded_img = mmcv.impad(img, (15, 12), 0)
+        padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0)
        assert_array_equal(img, padded_img[:10, :10])
        assert_array_equal(
            np.zeros((5, 12), dtype='float32'), padded_img[10:, :])
@ -232,15 +232,17 @@ class TestGeometric:

        # RGB image
        img = np.random.rand(10, 10, 3).astype(np.float32)
-        padded_img = mmcv.impad(img, (15, 12), 0)
+        padded_img = mmcv.impad(img, padding=(0, 0, 2, 5), pad_val=0)
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :])
        assert_array_equal(
            np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :])

+        # RGB image with different values for three channels.
        img = np.random.randint(256, size=(10, 10, 3)).astype('uint8')
-        padded_img = mmcv.impad(img, (15, 12, 3), [100, 110, 120])
+        padded_img = mmcv.impad(
+            img, padding=(0, 0, 2, 5), pad_val=(100, 110, 120))
        assert_array_equal(img, padded_img[:10, :10, :])
        assert_array_equal(
            np.array([100, 110, 120], dtype='uint8') * np.ones(
@ -249,12 +251,105 @@ class TestGeometric:
            np.array([100, 110, 120], dtype='uint8') * np.ones(
                (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :])

+        # Pad the grayscale image to shape (15, 12)
+        img = np.random.rand(10, 10).astype(np.float32)
+        padded_img = mmcv.impad(img, shape=(15, 12))
+        assert_array_equal(img, padded_img[:10, :10])
+        assert_array_equal(
+            np.zeros((5, 12), dtype='float32'), padded_img[10:, :])
+        assert_array_equal(
+            np.zeros((15, 2), dtype='float32'), padded_img[:, 10:])
+
+        # Pad the RGB image to shape (15, 12)
+        img = np.random.rand(10, 10, 3).astype(np.float32)
+        padded_img = mmcv.impad(img, shape=(15, 12))
+        assert_array_equal(img, padded_img[:10, :10, :])
+        assert_array_equal(
+            np.zeros((5, 12, 3), dtype='float32'), padded_img[10:, :, :])
+        assert_array_equal(
+            np.zeros((15, 2, 3), dtype='float32'), padded_img[:, 10:, :])
+
+        # Pad the RGB image to shape (15, 12) with different values for
+        # three channels.
+        img = np.random.randint(256, size=(10, 10, 3)).astype('uint8')
+        padded_img = mmcv.impad(img, shape=(15, 12), pad_val=(100, 110, 120))
+        assert_array_equal(img, padded_img[:10, :10, :])
+        assert_array_equal(
+            np.array([100, 110, 120], dtype='uint8') * np.ones(
+                (5, 12, 3), dtype='uint8'), padded_img[10:, :, :])
+        assert_array_equal(
+            np.array([100, 110, 120], dtype='uint8') * np.ones(
+                (15, 2, 3), dtype='uint8'), padded_img[:, 10:, :])
+
+        # RGB image with padding=[5, 2]
+        img = np.random.rand(10, 10, 3).astype(np.float32)
+        padded_img = mmcv.impad(img, padding=(5, 2), pad_val=0)
+
+        assert padded_img.shape == (14, 20, 3)
+        assert_array_equal(img, padded_img[2:12, 5:15, :])
+        assert_array_equal(
+            np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, :5, :])
+        assert_array_equal(
+            np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, :5, :])
+        assert_array_equal(
+            np.zeros((2, 5, 3), dtype='float32'), padded_img[:2, 15:, :])
+        assert_array_equal(
+            np.zeros((2, 5, 3), dtype='float32'), padded_img[12:, 15:, :])
+
+        # RGB image with type(pad_val) = tuple
+        pad_val = (0, 1, 2)
+        img = np.random.rand(10, 10, 3).astype(np.float32)
+        padded_img = mmcv.impad(img, padding=(0, 0, 5, 2), pad_val=pad_val)
+
+        assert padded_img.shape == (12, 15, 3)
+        assert_array_equal(img, padded_img[:10, :10, :])
+        assert_array_equal(pad_val[0] * np.ones((2, 15, 1), dtype='float32'),
+                           padded_img[10:, :, 0:1])
+        assert_array_equal(pad_val[1] * np.ones((2, 15, 1), dtype='float32'),
+                           padded_img[10:, :, 1:2])
+        assert_array_equal(pad_val[2] * np.ones((2, 15, 1), dtype='float32'),
+                           padded_img[10:, :, 2:3])
+
+        assert_array_equal(pad_val[0] * np.ones((12, 5, 1), dtype='float32'),
+                           padded_img[:, 10:, 0:1])
+        assert_array_equal(pad_val[1] * np.ones((12, 5, 1), dtype='float32'),
+                           padded_img[:, 10:, 1:2])
+        assert_array_equal(pad_val[2] * np.ones((12, 5, 1), dtype='float32'),
+                           padded_img[:, 10:, 2:3])
+
+        # test different padding mode with channel number = 3
+        for mode in ['constant', 'edge', 'reflect', 'symmetric']:
+            img = np.random.rand(10, 10, 3).astype(np.float32)
+            padded_img = mmcv.impad(
+                img, padding=(0, 0, 5, 2), pad_val=pad_val, padding_mode=mode)
+            assert padded_img.shape == (12, 15, 3)
+
+        # test different padding mode with channel number = 1
+        for mode in ['constant', 'edge', 'reflect', 'symmetric']:
+            img = np.random.rand(10, 10).astype(np.float32)
+            padded_img = mmcv.impad(
+                img, padding=(0, 0, 5, 2), pad_val=0, padding_mode=mode)
+            assert padded_img.shape == (12, 15)
+
+        # Padding must be a int or a 2, or 4 element tuple.
+        with pytest.raises(ValueError):
+            mmcv.impad(img, padding=(1, 1, 1))
+
+        # pad_val must be a int or a tuple
+        with pytest.raises(TypeError):
+            mmcv.impad(img, padding=(1, 1, 1, 1), pad_val='wrong')
+
+        # When pad_val is a tuple,
+        # len(pad_val) should be equal to img.shape[-1]
+        img = np.random.rand(10, 10, 3).astype(np.float32)
        with pytest.raises(AssertionError):
-            mmcv.impad(img, (15, ), 0)
+            mmcv.impad(img, padding=3, pad_val=(100, 200))
+
        with pytest.raises(AssertionError):
-            mmcv.impad(img, (5, 5), 0)
+            mmcv.impad(img, padding=2, pad_val=0, padding_mode='unknown')
+
        with pytest.raises(AssertionError):
-            mmcv.impad(img, (5, 5), [0, 1])
+            mmcv.impad(img, shape=(12, 15), padding=(0, 0, 5, 2))

    def test_impad_to_multiple(self):
        img = np.random.rand(11, 14, 3).astype(np.float32)