[Feature] Let ResizeOCR full support mmcv.impad's pad_val parameters (#1437)

* fix #1436: Let ResizeOCR full support mmcv.impad's pad_val parameters

* fix #1436: Let ResizeOCR full support mmcv.impad's pad_val parameters
pull/1205/head
hsiehpinghan 2022-10-10 11:09:32 +08:00 committed by GitHub
parent 5fc920495a
commit a71889aeb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 7 deletions

View File

@ -28,12 +28,28 @@ class ResizeOCR:
keep_aspect_ratio (bool): Keep image aspect ratio if True
during resizing, Otherwise resize to the size height *
max_width.
img_pad_value (int): Scalar to fill padding area.
img_pad_value (Number | Sequence[Number]): Values to be filled in
padding areas when padding_mode is 'constant'. Default: 0.
width_downsample_ratio (float): Downsample ratio in horizontal
direction from input image to output feature.
backend (str | None): The image resize backend type. Options are `cv2`,
`pillow`, `None`. If backend is None, the global imread_backend
specified by ``mmcv.use_backend()`` will be used. Default: None.
backend (str | None): The image resize backend type. Options are
`cv2`, `pillow`, `None`. If backend is None, the global
imread_backend specified by ``mmcv.use_backend()`` will be used.
Default: None.
padding_mode (str): Type of padding. Should be: constant, edge,
reflect or symmetric. Default: constant.
- constant: pads with a constant value, this value is specified
with img_pad_value.
- edge: pads with the last value at the edge of the image.
- reflect: pads with reflection of image without repeating the last
value on the edge. For example, padding [1, 2, 3, 4] with 2
elements on both sides in reflect mode will result in
[3, 2, 1, 2, 3, 4, 3, 2].
- symmetric: pads with reflection of image repeating the last value
on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
both sides in symmetric mode will result in
[2, 1, 1, 2, 3, 4, 4, 3]
"""
def __init__(self,
@ -43,14 +59,14 @@ class ResizeOCR:
keep_aspect_ratio=True,
img_pad_value=0,
width_downsample_ratio=1.0 / 16,
backend=None):
backend=None,
padding_mode='constant'):
assert isinstance(height, (int, tuple))
assert utils.is_none_or_type(min_width, (int, tuple))
assert utils.is_none_or_type(max_width, (int, tuple))
if not keep_aspect_ratio:
assert max_width is not None, ('"max_width" must assigned '
'if "keep_aspect_ratio" is False')
assert isinstance(img_pad_value, int)
if isinstance(height, tuple):
assert isinstance(min_width, tuple)
assert isinstance(max_width, tuple)
@ -63,6 +79,7 @@ class ResizeOCR:
self.img_pad_value = img_pad_value
self.width_downsample_ratio = width_downsample_ratio
self.backend = backend
self.padding_mode = padding_mode
def __call__(self, results):
rank, _ = get_dist_info()
@ -105,7 +122,8 @@ class ResizeOCR:
img_resize = mmcv.impad(
img_resize,
shape=(dst_height, dst_max_width),
pad_val=self.img_pad_value)
pad_val=self.img_pad_value,
padding_mode=self.padding_mode)
pad_shape = img_resize.shape
else:
img_resize = mmcv.imresize(

View File

@ -31,6 +31,19 @@ def test_resize_ocr():
results = rci(results)
assert math.isclose(results['valid_ratio'], 1)
# test img_pad_value
rci = transforms.ResizeOCR(
32,
min_width=32,
max_width=160,
keep_aspect_ratio=True,
img_pad_value=(127, 127, 127))
results = {'img_shape': input_img.shape, 'img': input_img}
results = rci(results)
assert results['img'].shape == (32, 160, 3)
assert np.all(results['img'][:32, :128, :] == np.array([1]))
assert np.all(results['img'][:32, 128:, :] == np.array([127]))
def test_to_tensor():
input_img = np.ones((64, 256, 3), dtype=np.uint8)