mirror of https://github.com/open-mmlab/mmocr.git
[Transform] RandomFlip
parent
d5a2d20574
commit
f03ed3ce11
|
@ -8,10 +8,10 @@ from .ocr_seg_targets import OCRSegTargets
|
|||
from .ocr_transforms import (FancyPCA, NormalizeOCR, OnlineCropOCR,
|
||||
OpencvToPil, PilToOpencv, RandomPaddingOCR,
|
||||
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
|
||||
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomRotate,
|
||||
RescaleToHeight, Resize, ShortScaleAspectJitter,
|
||||
SourceImagePad, TextDetRandomCrop,
|
||||
TextDetRandomCropFlip)
|
||||
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomFlip,
|
||||
RandomRotate, RescaleToHeight, Resize,
|
||||
ShortScaleAspectJitter, SourceImagePad,
|
||||
TextDetRandomCrop, TextDetRandomCropFlip)
|
||||
from .test_time_aug import MultiRotateAugOCR
|
||||
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
|
||||
TextSnakeTargets)
|
||||
|
@ -28,5 +28,5 @@ __all__ = [
|
|||
'ToTensorNER', 'ResizeNoImg', 'PyramidRescale', 'TorchVisionWrapper',
|
||||
'Resize', 'RandomCrop', 'TextDetRandomCrop', 'RandomCrop',
|
||||
'PackTextDetInputs', 'PackTextRecogInputs', 'RescaleToHeight',
|
||||
'PadToWidth', 'ShortScaleAspectJitter'
|
||||
'PadToWidth', 'ShortScaleAspectJitter', 'RandomFlip'
|
||||
]
|
||||
|
|
|
@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
|
|||
import cv2
|
||||
import mmcv
|
||||
import numpy as np
|
||||
from mmcv.transforms import RandomFlip as MMCV_RandomFlip
|
||||
from mmcv.transforms import Resize as MMCV_Resize
|
||||
from mmcv.transforms.base import BaseTransform
|
||||
from mmcv.transforms.utils import avoid_cache_randomness, cache_randomness
|
||||
|
@ -1359,6 +1360,99 @@ class SourceImagePad(BaseTransform):
|
|||
return repr_str
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class RandomFlip(MMCV_RandomFlip):
|
||||
"""Flip the image & bbox polygon.
|
||||
|
||||
There are 3 flip modes:
|
||||
|
||||
- ``prob`` is float, ``direction`` is string: the image will be
|
||||
``direction``ly flipped with probability of ``prob`` .
|
||||
E.g., ``prob=0.5``, ``direction='horizontal'``,
|
||||
then image will be horizontally flipped with probability of 0.5.
|
||||
- ``prob`` is float, ``direction`` is list of string: the image will
|
||||
be ``direction[i]``ly flipped with probability of
|
||||
``prob/len(direction)``.
|
||||
E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``,
|
||||
then image will be horizontally flipped with probability of 0.25,
|
||||
vertically with probability of 0.25.
|
||||
- ``prob`` is list of float, ``direction`` is list of string:
|
||||
given ``len(prob) == len(direction)``, the image will
|
||||
be ``direction[i]``ly flipped with probability of ``prob[i]``.
|
||||
E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal',
|
||||
'vertical']``, then image will be horizontally flipped with
|
||||
probability of 0.3, vertically with probability of 0.5.
|
||||
|
||||
Required Keys:
|
||||
- img
|
||||
- gt_bboxes (optional)
|
||||
- gt_polygons (optional)
|
||||
|
||||
Modified Keys:
|
||||
- img
|
||||
- gt_bboxes (optional)
|
||||
- gt_polygons (optional)
|
||||
|
||||
Added Keys:
|
||||
- flip
|
||||
- flip_direction
|
||||
Args:
|
||||
prob (float | list[float], optional): The flipping probability.
|
||||
Defaults to None.
|
||||
direction(str | list[str]): The flipping direction. Options
|
||||
If input is a list, the length must equal ``prob``. Each
|
||||
element in ``prob`` indicates the flip probability of
|
||||
corresponding direction. Defaults to 'horizontal'.
|
||||
"""
|
||||
|
||||
def flip_polygons(self, polygons: Sequence[np.ndarray],
|
||||
img_shape: Tuple[int, int],
|
||||
direction: str) -> Sequence[np.ndarray]:
|
||||
"""Flip polygons horizontally, vertically or diagonally.
|
||||
|
||||
Args:
|
||||
polygons (list[numpy.ndarray): polygons.
|
||||
img_shape (tuple[int]): Image shape (height, width)
|
||||
direction (str): Flip direction. Options are 'horizontal',
|
||||
'vertical' and 'diagonal'.
|
||||
Returns:
|
||||
list[numpy.ndarray]: Flipped polygons.
|
||||
"""
|
||||
|
||||
h, w = img_shape
|
||||
flipped_polygons = []
|
||||
if direction == 'horizontal':
|
||||
for polygon in polygons:
|
||||
flipped_polygon = polygon.copy()
|
||||
flipped_polygon[0::2] = w - polygon[0::2]
|
||||
flipped_polygons.append(flipped_polygon)
|
||||
elif direction == 'vertical':
|
||||
for polygon in polygons:
|
||||
flipped_polygon = polygon.copy()
|
||||
flipped_polygon[1::2] = h - polygon[1::2]
|
||||
flipped_polygons.append(flipped_polygon)
|
||||
elif direction == 'diagonal':
|
||||
for polygon in polygons:
|
||||
flipped_polygon = polygon.copy()
|
||||
flipped_polygon[0::2] = w - polygon[0::2]
|
||||
flipped_polygon[1::2] = h - polygon[1::2]
|
||||
flipped_polygons.append(flipped_polygon)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Flipping direction must be 'horizontal', 'vertical', \
|
||||
or 'diagnal', but got '{direction}'")
|
||||
return flipped_polygons
|
||||
|
||||
def _flip(self, results: dict) -> None:
|
||||
"""Flip images, bounding boxes and polygons."""
|
||||
super()._flip(results)
|
||||
# flip polygons
|
||||
if results.get('gt_polygons', None) is not None:
|
||||
results['gt_polygons'] = self.flip_polygons(
|
||||
results['gt_polygons'], results['img'].shape[:2],
|
||||
results['flip_direction'])
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
@avoid_cache_randomness
|
||||
class ShortScaleAspectJitter(BaseTransform):
|
||||
|
|
|
@ -7,7 +7,8 @@ import numpy as np
|
|||
from mmcv.transforms import Pad, RandomResize
|
||||
|
||||
from mmocr.datasets.pipelines import (PadToWidth, PyramidRescale, RandomCrop,
|
||||
RandomRotate, RescaleToHeight, Resize,
|
||||
RandomFlip, RandomRotate,
|
||||
RescaleToHeight, Resize,
|
||||
ShortScaleAspectJitter, SourceImagePad,
|
||||
TextDetRandomCrop, TextDetRandomCropFlip)
|
||||
from mmocr.utils import bbox2poly, poly2shapely
|
||||
|
@ -633,6 +634,50 @@ class TestSourceImagePad(unittest.TestCase):
|
|||
))
|
||||
|
||||
|
||||
class TestRandomFlip(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
img = np.random.random((30, 40, 3))
|
||||
gt_polygons = [np.array([10., 5., 20., 5., 20., 10., 10., 10.])]
|
||||
self.data_info = dict(
|
||||
img_shape=(30, 40), img=img, gt_polygons=gt_polygons)
|
||||
|
||||
def test_flip_polygons(self):
|
||||
t = RandomFlip(prob=1.0, direction='horizontal')
|
||||
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
|
||||
'horizontal')
|
||||
self.assertIsInstance(results, list)
|
||||
self.assertIsInstance(results[0], np.ndarray)
|
||||
self.assertTrue(
|
||||
(results[0] == np.array([30., 5., 20., 5., 20., 10., 30.,
|
||||
10.])).all())
|
||||
|
||||
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
|
||||
'vertical')
|
||||
self.assertIsInstance(results, list)
|
||||
self.assertIsInstance(results[0], np.ndarray)
|
||||
self.assertTrue(
|
||||
(results[0] == np.array([10., 25., 20., 25., 20., 20., 10.,
|
||||
20.])).all())
|
||||
results = t.flip_polygons(self.data_info['gt_polygons'], (30, 40),
|
||||
'diagonal')
|
||||
self.assertIsInstance(results, list)
|
||||
self.assertIsInstance(results[0], np.ndarray)
|
||||
self.assertTrue(
|
||||
(results[0] == np.array([30., 25., 20., 25., 20., 20., 30.,
|
||||
20.])).all())
|
||||
with self.assertRaises(ValueError):
|
||||
t.flip_polygons(self.data_info['gt_polygons'], (30, 40), 'mmocr')
|
||||
|
||||
def test_flip(self):
|
||||
t = RandomFlip(prob=1.0, direction='horizontal')
|
||||
results = t(self.data_info.copy())
|
||||
self.assertEqual(results['img'].shape, (30, 40, 3))
|
||||
self.assertEqual(results['img_shape'], (30, 40))
|
||||
self.assertTrue((results['gt_polygons'][0] == np.array(
|
||||
[30., 5., 20., 5., 20., 10., 30., 10.])).all())
|
||||
|
||||
|
||||
class TestShortScaleAspectJitter(unittest.TestCase):
|
||||
|
||||
@mock.patch('mmocr.datasets.pipelines.processing.np.random.random_sample')
|
||||
|
|
Loading…
Reference in New Issue