From 0c8fa52b223c6b0c295af31c08615fcfd574a750 Mon Sep 17 00:00:00 2001 From: Tong Gao Date: Thu, 28 Apr 2022 22:52:11 +0800 Subject: [PATCH] [Fix] Fix ImgAug transform (#949) * [Fix] Fix bbox case in ImgAug * Finialize the augmentation and add tests --- mmocr/datasets/pipelines/dbnet_transforms.py | 61 +++++++++++++++++--- tests/test_dataset/test_dbnet_transforms.py | 40 ++++++++++--- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/mmocr/datasets/pipelines/dbnet_transforms.py b/mmocr/datasets/pipelines/dbnet_transforms.py index 8494cdd6..b736d337 100644 --- a/mmocr/datasets/pipelines/dbnet_transforms.py +++ b/mmocr/datasets/pipelines/dbnet_transforms.py @@ -57,11 +57,14 @@ class ImgAug: followed by random rotation with angles in range [-10, 10], and resize with an independent scale in range [0.5, 3.0] for each side of images. + clip_invalid_polys (bool): Whether to clip invalid polygons after + transformation. False persists to the behavior in DBNet. """ - def __init__(self, args=None): + def __init__(self, args=None, clip_invalid_ploys=True): self.augmenter_args = args self.augmenter = AugmenterBuilder().build(self.augmenter_args) + self.clip_invalid_polys = clip_invalid_ploys def __call__(self, results): # img is bgr @@ -87,28 +90,68 @@ class ImgAug: # augment polygon mask for key in results['mask_fields']: - masks = self.may_augment_poly(aug, shape, results[key]) - if len(masks) > 0: + if self.clip_invalid_polys: + masks = self.may_augment_poly(aug, shape, results[key]) results[key] = PolygonMasks(masks, *target_shape[:2]) + else: + masks = self.may_augment_poly_legacy(aug, shape, results[key]) + if len(masks) > 0: + results[key] = PolygonMasks(masks, *target_shape[:2]) # augment bbox for key in results['bbox_fields']: - bboxes = self.may_augment_poly( - aug, shape, results[key], mask_flag=False) + bboxes = self.may_augment_bbox(aug, shape, results[key]) results[key] = np.zeros(0) if len(bboxes) > 0: results[key] = np.stack(bboxes) return results - def may_augment_poly(self, aug, img_shape, polys, mask_flag=True): + def may_augment_bbox(self, aug, ori_shape, bboxes): + imgaug_bboxes = [] + for bbox in bboxes: + x1, y1, x2, y2 = bbox + imgaug_bboxes.append( + imgaug.BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2)) + imgaug_bboxes = aug.augment_bounding_boxes([ + imgaug.BoundingBoxesOnImage(imgaug_bboxes, shape=ori_shape) + ])[0].clip_out_of_image() + + new_bboxes = [] + for box in imgaug_bboxes.bounding_boxes: + new_bboxes.append( + np.array([box.x1, box.y1, box.x2, box.y2], dtype=np.float32)) + + return new_bboxes + + def may_augment_poly(self, aug, img_shape, polys): + imgaug_polys = [] + for poly in polys: + poly = poly[0] + poly = poly.reshape(-1, 2) + imgaug_polys.append(imgaug.Polygon(poly)) + imgaug_polys = aug.augment_polygons( + [imgaug.PolygonsOnImage(imgaug_polys, + shape=img_shape)])[0].clip_out_of_image() + + new_polys = [] + for poly in imgaug_polys.polygons: + new_poly = [] + for point in poly: + new_poly.append(np.array(point, dtype=np.float32)) + new_poly = np.array(new_poly, dtype=np.float32).flatten() + new_polys.append([new_poly]) + + return new_polys + + def may_augment_poly_legacy(self, aug, img_shape, polys): key_points, poly_point_nums = [], [] for poly in polys: - if mask_flag: - poly = poly[0] + poly = poly[0] poly = poly.reshape(-1, 2) key_points.extend([imgaug.Keypoint(p[0], p[1]) for p in poly]) poly_point_nums.append(poly.shape[0]) + # Warning: we do not clip the out-of-boudnary polygons key_points = aug.augment_keypoints( [imgaug.KeypointsOnImage(keypoints=key_points, shape=img_shape)])[0].keypoints @@ -122,7 +165,7 @@ class ImgAug: new_poly.append([key_point.x, key_point.y]) start_idx += poly_point_num new_poly = np.array(new_poly).flatten() - new_polys.append([new_poly] if mask_flag else new_poly) + new_polys.append([new_poly]) return new_polys diff --git a/tests/test_dataset/test_dbnet_transforms.py b/tests/test_dataset/test_dbnet_transforms.py index 71c1e1c9..0b789a66 100644 --- a/tests/test_dataset/test_dbnet_transforms.py +++ b/tests/test_dataset/test_dbnet_transforms.py @@ -1,25 +1,47 @@ # Copyright (c) OpenMMLab. All rights reserved. +import imgaug import numpy as np +from shapely.geometry import Polygon import mmocr.datasets.pipelines.dbnet_transforms as transforms def test_imgaug(): - args = [['Fliplr', 0.5], - dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]] - imgaug = transforms.ImgAug(args) - img = np.random.rand(3, 100, 200) + args = [dict(cls='Affine', translate_px=dict(x=-10, y=-10))] + imgaug_transform = transforms.ImgAug(args, clip_invalid_ploys=False) + img = np.random.rand(100, 200, 3) poly = np.array([[[0, 0, 50, 0, 50, 50, 0, 50]], [[20, 20, 50, 20, 50, 50, 20, 50]]]) box = np.array([[0, 0, 50, 50], [20, 20, 50, 50]]) results = dict(img=img, masks=poly, bboxes=box) results['mask_fields'] = ['masks'] results['bbox_fields'] = ['bboxes'] - results = imgaug(results) - assert np.allclose(results['bboxes'][0], - results['masks'].masks[0][0][[0, 1, 4, 5]]) - assert np.allclose(results['bboxes'][1], - results['masks'].masks[1][0][[0, 1, 4, 5]]) + results = imgaug_transform(results) + for i in range(2): + mask = results['masks'].masks[i][0] + poly = imgaug.augmentables.polys.Polygon(mask.reshape(-1, 2)) + box = poly.to_bounding_box().clip_out_of_image(results['img_shape']) + assert box.coords_almost_equals(results['bboxes'][i].reshape(-1, 2)) + + args = [dict(cls='Affine', translate_px=dict(x=-10, y=-10))] + imgaug_transform = transforms.ImgAug(args, clip_invalid_ploys=True) + img = np.random.rand(100, 200, 3) + poly = np.array([[[0, 0, 50, 0, 50, 50, 0, 50]], + [[20, 20, 50, 20, 50, 50, 20, 50]]]) + box = np.array([[0, 0, 50, 50], [20, 20, 50, 50]]) + poly_target = np.array([[[0, 0, 40, 0, 40, 40, 0, 40]], + [[10, 10, 40, 10, 40, 40, 10, 40]]]) + box_target = np.array([[0, 0, 40, 40], [10, 10, 40, 40]]) + results = dict(img=img, masks=poly, bboxes=box) + results['mask_fields'] = ['masks'] + results['bbox_fields'] = ['bboxes'] + results = imgaug_transform(results) + assert np.allclose(results['bboxes'], box_target) + for i in range(2): + poly1 = Polygon(results['masks'].masks[i][0].reshape(-1, 2)) + poly2 = Polygon(poly_target[i].reshape(-1, 2)) + assert poly1.equals(poly2) + assert np.allclose(results['bboxes'][i], box_target[i]) def test_eastrandomcrop():