mirror of https://github.com/open-mmlab/mmocr.git
[Transform] ScaleAspectJitter
parent
da175b44a4
commit
dfe93dc7d2
|
@ -9,8 +9,9 @@ from .ocr_transforms import (FancyPCA, NormalizeOCR, OnlineCropOCR,
|
||||||
OpencvToPil, PilToOpencv, RandomPaddingOCR,
|
OpencvToPil, PilToOpencv, RandomPaddingOCR,
|
||||||
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
|
RandomRotateImageBox, ResizeOCR, ToTensorOCR)
|
||||||
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomRotate,
|
from .processing import (PadToWidth, PyramidRescale, RandomCrop, RandomRotate,
|
||||||
RescaleToHeight, Resize, SourceImagePad,
|
RescaleToHeight, Resize, ShortScaleAspectJitter,
|
||||||
TextDetRandomCrop, TextDetRandomCropFlip)
|
SourceImagePad, TextDetRandomCrop,
|
||||||
|
TextDetRandomCropFlip)
|
||||||
from .test_time_aug import MultiRotateAugOCR
|
from .test_time_aug import MultiRotateAugOCR
|
||||||
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
|
from .textdet_targets import (DBNetTargets, FCENetTargets, PANetTargets,
|
||||||
TextSnakeTargets)
|
TextSnakeTargets)
|
||||||
|
@ -26,5 +27,6 @@ __all__ = [
|
||||||
'sort_vertex8', 'FCENetTargets', 'TextDetRandomCropFlip', 'NerTransform',
|
'sort_vertex8', 'FCENetTargets', 'TextDetRandomCropFlip', 'NerTransform',
|
||||||
'ToTensorNER', 'ResizeNoImg', 'PyramidRescale', 'TorchVisionWrapper',
|
'ToTensorNER', 'ResizeNoImg', 'PyramidRescale', 'TorchVisionWrapper',
|
||||||
'Resize', 'RandomCrop', 'TextDetRandomCrop', 'RandomCrop',
|
'Resize', 'RandomCrop', 'TextDetRandomCrop', 'RandomCrop',
|
||||||
'PackTextDetInputs', 'PackTextRecogInputs', 'RescaleToHeight', 'PadToWidth'
|
'PackTextDetInputs', 'PackTextRecogInputs', 'RescaleToHeight',
|
||||||
|
'PadToWidth', 'ShortScaleAspectJitter'
|
||||||
]
|
]
|
||||||
|
|
|
@ -1357,3 +1357,100 @@ class SourceImagePad(BaseTransform):
|
||||||
repr_str += f'(target_scale = {self.target_scale}, '
|
repr_str += f'(target_scale = {self.target_scale}, '
|
||||||
repr_str += f'crop_ratio = {self.crop_ratio})'
|
repr_str += f'crop_ratio = {self.crop_ratio})'
|
||||||
return repr_str
|
return repr_str
|
||||||
|
|
||||||
|
|
||||||
|
@TRANSFORMS.register_module()
|
||||||
|
@avoid_cache_randomness
|
||||||
|
class ShortScaleAspectJitter(BaseTransform):
|
||||||
|
"""First rescale the image for its shorter side to reach the short_size and
|
||||||
|
then jitter its aspect ratio, final rescale the shape guaranteed to be
|
||||||
|
divided by scale_divisor.
|
||||||
|
|
||||||
|
Required Keys:
|
||||||
|
|
||||||
|
- img
|
||||||
|
- img_shape
|
||||||
|
- gt_bboxes (optional)
|
||||||
|
- gt_polygons (optional)
|
||||||
|
|
||||||
|
|
||||||
|
Modified Keys:
|
||||||
|
|
||||||
|
- img
|
||||||
|
- img_shape
|
||||||
|
- gt_bboxes (optional)
|
||||||
|
- gt_polygons (optional)
|
||||||
|
|
||||||
|
Added Keys:
|
||||||
|
|
||||||
|
- scale
|
||||||
|
- scale_factor
|
||||||
|
- keep_ratio
|
||||||
|
|
||||||
|
Args:
|
||||||
|
short_size (int): Target shorter size before jittering the aspect
|
||||||
|
ratio. Defaults to 736.
|
||||||
|
short_size_jitter_range (tuple(float, float)): Range of the ratio used
|
||||||
|
to jitter the target shorter size. Defaults to (0.7, 1.3).
|
||||||
|
aspect_ratio_jitter_range (tuple(float, float)): Range of the ratio
|
||||||
|
used to jitter its aspect ratio. Defaults to (0.9, 1.1).
|
||||||
|
scale_divisor (int): The scale divisor. Defaults to 1.
|
||||||
|
resize_cfg (dict): (dict): Config to construct the Resize transform.
|
||||||
|
Refer to ``Resize`` for detail. Defaults to
|
||||||
|
``dict(type='Resize')``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
short_size: int = 736,
|
||||||
|
ratio_range: Tuple[float, float] = (0.7, 1.3),
|
||||||
|
aspect_ratio_range: Tuple[float, float] = (0.9, 1.1),
|
||||||
|
scale_divisor: int = 1,
|
||||||
|
resize_cfg: Dict = dict(type='Resize')) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.short_size = short_size
|
||||||
|
self.ratio_range = ratio_range
|
||||||
|
self.aspect_ratio_range = aspect_ratio_range
|
||||||
|
self.resize_cfg = resize_cfg
|
||||||
|
# create a empty Reisize object
|
||||||
|
resize_cfg.update(dict(scale=0))
|
||||||
|
self.resize = TRANSFORMS.build(resize_cfg)
|
||||||
|
self.scale_divisor = scale_divisor
|
||||||
|
|
||||||
|
def _sample_from_range(self, range: Tuple[float, float]) -> float:
|
||||||
|
"""A ratio will be randomly sampled from the range specified by
|
||||||
|
``range``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ratio_range (tuple[float]): The minimum and maximum ratio.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: A ratio randomly sampled from the range.
|
||||||
|
"""
|
||||||
|
min_value, max_value = min(range), max(range)
|
||||||
|
value = np.random.random_sample() * (max_value - min_value) + min_value
|
||||||
|
return value
|
||||||
|
|
||||||
|
def transform(self, results: Dict) -> Dict:
|
||||||
|
h, w = results['img'].shape[:2]
|
||||||
|
ratio = self._sample_from_range(self.ratio_range)
|
||||||
|
scale = (ratio * self.short_size) / min(h, w)
|
||||||
|
|
||||||
|
aspect = self._sample_from_range(self.aspect_ratio_range)
|
||||||
|
h_scale = scale * math.sqrt(aspect)
|
||||||
|
w_scale = scale / math.sqrt(aspect)
|
||||||
|
new_h = round(h * h_scale)
|
||||||
|
new_w = round(w * w_scale)
|
||||||
|
|
||||||
|
new_h = math.ceil(new_h / self.scale_divisor) * self.scale_divisor
|
||||||
|
new_w = math.ceil(new_w / self.scale_divisor) * self.scale_divisor
|
||||||
|
self.resize.scale = (new_w, new_h)
|
||||||
|
return self.resize(results)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
repr_str = self.__class__.__name__
|
||||||
|
repr_str += f'(short_size = {self.short_size}, '
|
||||||
|
repr_str += f'ratio_range = {self.ratio_range}, '
|
||||||
|
repr_str += f'aspect_ratio_range = {self.aspect_ratio_range}, '
|
||||||
|
repr_str += f'scale_divisor = {self.scale_divisor}, '
|
||||||
|
repr_str += f'resize_cfg = {self.resize_cfg})'
|
||||||
|
return repr_str
|
||||||
|
|
|
@ -8,8 +8,8 @@ from mmcv.transforms import Pad, RandomResize
|
||||||
|
|
||||||
from mmocr.datasets.pipelines import (PadToWidth, PyramidRescale, RandomCrop,
|
from mmocr.datasets.pipelines import (PadToWidth, PyramidRescale, RandomCrop,
|
||||||
RandomRotate, RescaleToHeight, Resize,
|
RandomRotate, RescaleToHeight, Resize,
|
||||||
SourceImagePad, TextDetRandomCrop,
|
ShortScaleAspectJitter, SourceImagePad,
|
||||||
TextDetRandomCropFlip)
|
TextDetRandomCrop, TextDetRandomCropFlip)
|
||||||
from mmocr.utils import bbox2poly, poly2shapely
|
from mmocr.utils import bbox2poly, poly2shapely
|
||||||
|
|
||||||
|
|
||||||
|
@ -631,3 +631,39 @@ class TestSourceImagePad(unittest.TestCase):
|
||||||
repr(transform),
|
repr(transform),
|
||||||
('SourceImagePad(target_scale = (30, 30), crop_ratio = (0.1, 0.1))'
|
('SourceImagePad(target_scale = (30, 30), crop_ratio = (0.1, 0.1))'
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
class TestShortScaleAspectJitter(unittest.TestCase):
|
||||||
|
|
||||||
|
@mock.patch('mmocr.datasets.pipelines.processing.np.random.random_sample')
|
||||||
|
def test_transform(self, mock_random):
|
||||||
|
ratio_range = (0.5, 1.5)
|
||||||
|
aspect_ratio_range = (0.9, 1.1)
|
||||||
|
mock_random.side_effect = [0.5, 0.5]
|
||||||
|
img = np.zeros((15, 20, 3))
|
||||||
|
polygon = [np.array([10., 5., 20., 5., 20., 10., 10., 10.])]
|
||||||
|
bbox = np.array([[10., 5., 20., 10.]])
|
||||||
|
data_info = dict(img=img, gt_polygons=polygon, gt_bboxes=bbox)
|
||||||
|
t = ShortScaleAspectJitter(
|
||||||
|
short_size=40,
|
||||||
|
ratio_range=ratio_range,
|
||||||
|
aspect_ratio_range=aspect_ratio_range,
|
||||||
|
scale_divisor=4)
|
||||||
|
results = t(data_info)
|
||||||
|
self.assertEqual(results['img'].shape, (40, 56, 3))
|
||||||
|
self.assertEqual(results['img_shape'], (40, 56))
|
||||||
|
|
||||||
|
def test_repr(self):
|
||||||
|
transform = ShortScaleAspectJitter(
|
||||||
|
short_size=40,
|
||||||
|
ratio_range=(0.5, 1.5),
|
||||||
|
aspect_ratio_range=(0.9, 1.1),
|
||||||
|
scale_divisor=4,
|
||||||
|
resize_cfg=dict(type='Resize'))
|
||||||
|
self.assertEqual(
|
||||||
|
repr(transform), ('ShortScaleAspectJitter('
|
||||||
|
'short_size = 40, '
|
||||||
|
'ratio_range = (0.5, 1.5), '
|
||||||
|
'aspect_ratio_range = (0.9, 1.1), '
|
||||||
|
'scale_divisor = 4, '
|
||||||
|
"resize_cfg = {'type': 'Resize', 'scale': 0})"))
|
||||||
|
|
Loading…
Reference in New Issue