[Transform] RandomFlip

pull/1178/head
liukuikun 2022-06-13 08:18:00 +00:00 committed by gaotongxiao
parent d5a2d20574
commit f03ed3ce11
3 changed files with 145 additions and 6 deletions

View File

@ -8,10 +8,10 @@ from .ocr_seg_targets import OCRSegTargets
from .ocr_transforms import (FancyPCA, NormalizeOCR, OnlineCropOCR,
OpencvToPil, PilToOpencv, RandomPaddingOCR,
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomRotate,
RescaleToHeight, Resize, ShortScaleAspectJitter,
SourceImagePad, TextDetRandomCrop,
TextDetRandomCropFlip)
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomFlip,
RandomRotate, RescaleToHeight, Resize,
ShortScaleAspectJitter, SourceImagePad,
TextDetRandomCrop, TextDetRandomCropFlip)
from .test_time_aug import MultiRotateAugOCR
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
TextSnakeTargets)
@ -28,5 +28,5 @@ __all__ = [
'ToTensorNER', 'ResizeNoImg', 'PyramidRescale', 'TorchVisionWrapper',
'Resize', 'RandomCrop', 'TextDetRandomCrop', 'RandomCrop',
'PackTextDetInputs', 'PackTextRecogInputs', 'RescaleToHeight',
'PadToWidth', 'ShortScaleAspectJitter'
'PadToWidth', 'ShortScaleAspectJitter', 'RandomFlip'
]

View File

@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
import cv2
import mmcv
import numpy as np
from mmcv.transforms import RandomFlip as MMCV_RandomFlip
from mmcv.transforms import Resize as MMCV_Resize
from mmcv.transforms.base import BaseTransform
from mmcv.transforms.utils import avoid_cache_randomness, cache_randomness
@ -1359,6 +1360,99 @@ class SourceImagePad(BaseTransform):
return repr_str
@TRANSFORMS.register_module()
class RandomFlip(MMCV_RandomFlip):
"""Flip the image & bbox polygon.
There are 3 flip modes:
- ``prob`` is float, ``direction`` is string: the image will be
``direction``ly flipped with probability of ``prob`` .
E.g., ``prob=0.5``, ``direction='horizontal'``,
then image will be horizontally flipped with probability of 0.5.
- ``prob`` is float, ``direction`` is list of string: the image will
be ``direction[i]``ly flipped with probability of
``prob/len(direction)``.
E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``,
then image will be horizontally flipped with probability of 0.25,
vertically with probability of 0.25.
- ``prob`` is list of float, ``direction`` is list of string:
given ``len(prob) == len(direction)``, the image will
be ``direction[i]``ly flipped with probability of ``prob[i]``.
E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal',
'vertical']``, then image will be horizontally flipped with
probability of 0.3, vertically with probability of 0.5.
Required Keys:
- img
- gt_bboxes (optional)
- gt_polygons (optional)
Modified Keys:
- img
- gt_bboxes (optional)
- gt_polygons (optional)
Added Keys:
- flip
- flip_direction
Args:
prob (float | list[float], optional): The flipping probability.
Defaults to None.
direction(str | list[str]): The flipping direction. Options
If input is a list, the length must equal ``prob``. Each
element in ``prob`` indicates the flip probability of
corresponding direction. Defaults to 'horizontal'.
"""
def flip_polygons(self, polygons: Sequence[np.ndarray],
img_shape: Tuple[int, int],
direction: str) -> Sequence[np.ndarray]:
"""Flip polygons horizontally, vertically or diagonally.
Args:
polygons (list[numpy.ndarray): polygons.
img_shape (tuple[int]): Image shape (height, width)
direction (str): Flip direction. Options are 'horizontal',
'vertical' and 'diagonal'.
Returns:
list[numpy.ndarray]: Flipped polygons.
"""
h, w = img_shape
flipped_polygons = []
if direction == 'horizontal':
for polygon in polygons:
flipped_polygon = polygon.copy()
flipped_polygon[0::2] = w - polygon[0::2]
flipped_polygons.append(flipped_polygon)
elif direction == 'vertical':
for polygon in polygons:
flipped_polygon = polygon.copy()
flipped_polygon[1::2] = h - polygon[1::2]
flipped_polygons.append(flipped_polygon)
elif direction == 'diagonal':
for polygon in polygons:
flipped_polygon = polygon.copy()
flipped_polygon[0::2] = w - polygon[0::2]
flipped_polygon[1::2] = h - polygon[1::2]
flipped_polygons.append(flipped_polygon)
else:
raise ValueError(
f"Flipping direction must be 'horizontal', 'vertical', \
or 'diagnal', but got '{direction}'")
return flipped_polygons
def _flip(self, results: dict) -> None:
"""Flip images, bounding boxes and polygons."""
super()._flip(results)
# flip polygons
if results.get('gt_polygons', None) is not None:
results['gt_polygons'] = self.flip_polygons(
results['gt_polygons'], results['img'].shape[:2],
results['flip_direction'])
@TRANSFORMS.register_module()
@avoid_cache_randomness
class ShortScaleAspectJitter(BaseTransform):

View File

@ -7,7 +7,8 @@ import numpy as np
from mmcv.transforms import Pad, RandomResize
from mmocr.datasets.pipelines import (PadToWidth, PyramidRescale, RandomCrop,
RandomRotate, RescaleToHeight, Resize,
RandomFlip, RandomRotate,
RescaleToHeight, Resize,
ShortScaleAspectJitter, SourceImagePad,
TextDetRandomCrop, TextDetRandomCropFlip)
from mmocr.utils import bbox2poly, poly2shapely
@ -633,6 +634,50 @@ class TestSourceImagePad(unittest.TestCase):
))
class TestRandomFlip(unittest.TestCase):
def setUp(self):
img = np.random.random((30, 40, 3))
gt_polygons = [np.array([10., 5., 20., 5., 20., 10., 10., 10.])]
self.data_info = dict(
img_shape=(30, 40), img=img, gt_polygons=gt_polygons)
def test_flip_polygons(self):
t = RandomFlip(prob=1.0, direction='horizontal')
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
'horizontal')
self.assertIsInstance(results, list)
self.assertIsInstance(results[0], np.ndarray)
self.assertTrue(
(results[0] == np.array([30., 5., 20., 5., 20., 10., 30.,
10.])).all())
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
'vertical')
self.assertIsInstance(results, list)
self.assertIsInstance(results[0], np.ndarray)
self.assertTrue(
(results[0] == np.array([10., 25., 20., 25., 20., 20., 10.,
20.])).all())
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
'diagonal')
self.assertIsInstance(results, list)
self.assertIsInstance(results[0], np.ndarray)
self.assertTrue(
(results[0] == np.array([30., 25., 20., 25., 20., 20., 30.,
20.])).all())
with self.assertRaises(ValueError):
t.flip_polygons(self.data_info['gt_polygons'], (30, 40), 'mmocr')
def test_flip(self):
t = RandomFlip(prob=1.0, direction='horizontal')
results = t(self.data_info.copy())
self.assertEqual(results['img'].shape, (30, 40, 3))
self.assertEqual(results['img_shape'], (30, 40))
self.assertTrue((results['gt_polygons'][0] == np.array(
[30., 5., 20., 5., 20., 10., 30., 10.])).all())
class TestShortScaleAspectJitter(unittest.TestCase):
@mock.patch('mmocr.datasets.pipelines.processing.np.random.random_sample')