mirror of https://github.com/open-mmlab/mmyolo.git
Add dataset transforms (#1)
parent
cfec074cf6
commit
e4359a15a3
|
@ -0,0 +1,8 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .transforms import (LetterResize, LoadAnnotations, YOLOv5HSVRandomAug,
|
||||
YOLOv5KeepRatioResize, YOLOv5RandomAffine)
|
||||
|
||||
__all__ = [
|
||||
'YOLOv5KeepRatioResize', 'LetterResize', 'YOLOv5HSVRandomAug', 'LoadAnnotations',
|
||||
'YOLOv5RandomAffine'
|
||||
]
|
|
@ -0,0 +1,597 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
from typing import Tuple, Union
|
||||
|
||||
import cv2
|
||||
import mmcv
|
||||
import numpy as np
|
||||
import torch
|
||||
from mmcv.transforms import BaseTransform
|
||||
from mmcv.transforms.utils import cache_randomness
|
||||
from numpy import random
|
||||
|
||||
from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations
|
||||
from mmdet.datasets.transforms import Resize as MMDET_Resize
|
||||
from mmdet.structures.bbox import autocast_box_type, get_box_type
|
||||
from mmyolo.registry import TRANSFORMS
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class YOLOv5KeepRatioResize(MMDET_Resize):
|
||||
"""Resize images & bbox(if existed).
|
||||
|
||||
This transform resizes the input image according to ``scale``.
|
||||
Bboxes (if existed) are then resized with the same scale factor.
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img (np.uint8)
|
||||
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
|
||||
|
||||
Modified Keys:
|
||||
|
||||
- img (np.uint8)
|
||||
- img_shape (tuple)
|
||||
- gt_bboxes (optional)
|
||||
- scale (float)
|
||||
|
||||
Added Keys:
|
||||
|
||||
- scale_factor (np.float32)
|
||||
|
||||
Args:
|
||||
scale (Union[int, Tuple[int, int]]): Images scales for resizing.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
scale: Union[int, Tuple[int, int]],
|
||||
keep_ratio: bool = True,
|
||||
**kwargs) -> None:
|
||||
assert keep_ratio is True
|
||||
super().__init__(scale=scale, keep_ratio=True, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_rescale_ratio(old_size, scale) -> float:
|
||||
"""Calculate the rescale ratio.
|
||||
|
||||
Args:
|
||||
old_size (tuple[int]): The old size (w, h) of image.
|
||||
scale (float | tuple[int]): The scaling factor or maximum size.
|
||||
If it is a float number, then the image will be rescaled by
|
||||
this factor, else if it is a tuple of 2 integers, then
|
||||
the image will be rescaled as large as possible within
|
||||
the scale.
|
||||
|
||||
Returns:
|
||||
float: The resize ratio.
|
||||
"""
|
||||
w, h = old_size
|
||||
if isinstance(scale, (float, int)):
|
||||
if scale <= 0:
|
||||
raise ValueError(f'Invalid scale {scale}, must be positive.')
|
||||
scale_factor = scale
|
||||
elif isinstance(scale, tuple):
|
||||
max_long_edge = max(scale)
|
||||
max_short_edge = min(scale)
|
||||
scale_factor = min(max_long_edge / max(h, w),
|
||||
max_short_edge / min(h, w))
|
||||
else:
|
||||
raise TypeError('Scale must be a number or tuple of int, '
|
||||
f'but got {type(scale)}')
|
||||
|
||||
return scale_factor
|
||||
|
||||
def _resize_img(self, results: dict) -> None:
|
||||
"""Resize images with ``results['scale']``."""
|
||||
assert self.keep_ratio is True
|
||||
|
||||
if results.get('img', None) is not None:
|
||||
image = results['img']
|
||||
original_h, original_w = image.shape[:2]
|
||||
ratio = self._get_rescale_ratio((original_h, original_w),
|
||||
self.scale)
|
||||
|
||||
if ratio != 1:
|
||||
# resize image according to the ratio
|
||||
image = mmcv.imrescale(
|
||||
img=image,
|
||||
scale=ratio,
|
||||
interpolation='area' if ratio < 1 else 'bilinear',
|
||||
backend=self.backend)
|
||||
|
||||
resized_h, resized_w = image.shape[:2]
|
||||
scale_ratio = resized_h / original_h
|
||||
|
||||
scale_factor = np.array([scale_ratio, scale_ratio],
|
||||
dtype=np.float32)
|
||||
|
||||
results['img'] = image
|
||||
results['img_shape'] = image.shape[:2]
|
||||
results['scale_factor'] = scale_factor
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class LetterResize(MMDET_Resize):
|
||||
"""Resize and pad image while meeting stride-multiple constraints.
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img (np.uint8)
|
||||
- batch_shape (np.int64) (optional)
|
||||
|
||||
Modified Keys:
|
||||
|
||||
- img (np.uint8)
|
||||
- img_shape (tuple)
|
||||
- gt_bboxes (optional)
|
||||
|
||||
Added Keys:
|
||||
- pad_param (np.float32)
|
||||
|
||||
Args:
|
||||
scale (Union[int, Tuple[int, int]]): Images scales for resizing.
|
||||
pad_val (dict): Padding value. Defaults to dict(img=0, seg=255).
|
||||
use_mini_pad (bool): Whether using minimum rectangle padding.
|
||||
Defaults to True
|
||||
stretch_only (bool): Whether stretch to the specified size directly.
|
||||
Defaults to False
|
||||
allow_scale_up (bool): Allow scale up when ratio > 1. Defaults to True
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
scale: Union[int, Tuple[int, int]],
|
||||
pad_val: dict = dict(img=0, mask=0, seg=255),
|
||||
use_mini_pad: bool = False,
|
||||
stretch_only: bool = False,
|
||||
allow_scale_up: bool = True,
|
||||
**kwargs) -> None:
|
||||
super().__init__(scale=scale, keep_ratio=True, **kwargs)
|
||||
|
||||
self.pad_val = pad_val
|
||||
if isinstance(pad_val, (int, float)):
|
||||
pad_val = dict(img=pad_val, seg=255)
|
||||
assert isinstance(
|
||||
pad_val, dict), f'pad_val must be dict, but got {type(pad_val)}'
|
||||
|
||||
self.use_mini_pad = use_mini_pad
|
||||
self.stretch_only = stretch_only
|
||||
self.allow_scale_up = allow_scale_up
|
||||
self.padded_val = None
|
||||
|
||||
def _resize_img(self, results: dict) -> None:
|
||||
"""Resize images with ``results['scale']``."""
|
||||
image = results.get('img', None)
|
||||
if image is None:
|
||||
return
|
||||
|
||||
# Use batch_shape if a batch_shape policy is configured
|
||||
if 'batch_shape' in results:
|
||||
self.scale = tuple(results['batch_shape'])
|
||||
|
||||
image_shape = image.shape[:2] # height, width
|
||||
|
||||
# Scale ratio (new / old)
|
||||
ratio = min(self.scale[0] / image_shape[0],
|
||||
self.scale[1] / image_shape[1])
|
||||
|
||||
# only scale down, do not scale up (for better test mAP)
|
||||
if not self.allow_scale_up:
|
||||
ratio = min(ratio, 1.0)
|
||||
|
||||
ratio = [ratio, ratio] # float -> (float, float) for (height, width)
|
||||
|
||||
# compute the best size of the image
|
||||
no_pad_shape = (int(round(image_shape[0] * ratio[0])),
|
||||
int(round(image_shape[1] * ratio[1])))
|
||||
|
||||
# padding height & width
|
||||
padding_h, padding_w = [
|
||||
self.scale[0] - no_pad_shape[0], self.scale[1] - no_pad_shape[1]
|
||||
]
|
||||
if self.use_mini_pad:
|
||||
# minimum rectangle padding
|
||||
padding_w, padding_h = np.mod(padding_w, 32), np.mod(padding_h, 32)
|
||||
|
||||
elif self.stretch_only:
|
||||
# stretch to the specified size directly
|
||||
padding_h, padding_w = 0.0, 0.0
|
||||
no_pad_shape = (self.scale[0], self.scale[1])
|
||||
ratio = [
|
||||
self.scale[0] / image_shape[0], self.scale[1] / image_shape[1]
|
||||
] # height, width ratios
|
||||
|
||||
# divide padding into 2 sides
|
||||
padding_h /= 2
|
||||
padding_w /= 2
|
||||
|
||||
if image_shape[::-1] != no_pad_shape:
|
||||
# compare with no resize and padding size
|
||||
image = mmcv.imrescale(
|
||||
image,
|
||||
no_pad_shape,
|
||||
interpolation=self.interpolation,
|
||||
backend=self.backend)
|
||||
|
||||
scale_factor = np.array([ratio[0], ratio[1]], dtype=np.float32)
|
||||
|
||||
if 'scale_factor' in results:
|
||||
results['scale_factor'] = results['scale_factor'] * scale_factor
|
||||
else:
|
||||
results['scale_factor'] = scale_factor
|
||||
|
||||
# padding
|
||||
top_padding, bottom_padding = int(round(padding_h - 0.1)), int(
|
||||
round(padding_h + 0.1))
|
||||
left_padding, right_padding = int(round(padding_w - 0.1)), int(
|
||||
round(padding_w + 0.1))
|
||||
|
||||
padding_list = [
|
||||
top_padding, bottom_padding, left_padding, right_padding
|
||||
]
|
||||
if top_padding != 0 or bottom_padding != 0 or \
|
||||
left_padding != 0 or right_padding != 0:
|
||||
|
||||
pad_val = self.pad_val.get('img', 0)
|
||||
if isinstance(pad_val, int) and image.ndim == 3:
|
||||
self.padded_val = tuple(pad_val for _ in range(image.shape[2]))
|
||||
else:
|
||||
self.padded_val = pad_val
|
||||
|
||||
image = mmcv.impad(
|
||||
img=image,
|
||||
padding=(padding_list[2], padding_list[0], padding_list[3],
|
||||
padding_list[1]),
|
||||
pad_val=self.padded_val,
|
||||
padding_mode='constant')
|
||||
|
||||
results['img'] = image
|
||||
results['img_shape'] = image.shape
|
||||
results['pad_param'] = np.array(padding_list, dtype=np.float32)
|
||||
|
||||
def _resize_masks(self, results: dict) -> None:
|
||||
"""Resize masks with ``results['scale']``"""
|
||||
if results.get('gt_masks', None) is None:
|
||||
return
|
||||
|
||||
# resize the gt_masks
|
||||
gt_mask_height = results['gt_masks'].height * \
|
||||
results['scale_factor'][0]
|
||||
gt_mask_width = results['gt_masks'].width * \
|
||||
results['scale_factor'][1]
|
||||
gt_masks = results['gt_masks'].rescale((gt_mask_height, gt_mask_width))
|
||||
|
||||
# padding the gt_masks
|
||||
if len(gt_masks) == 0:
|
||||
padded_masks = np.empty((0, *results['img_shape'][:2]),
|
||||
dtype=np.uint8)
|
||||
else:
|
||||
# TODO: The function is incorrect. Because the mask may not
|
||||
# be able to pad.
|
||||
padded_masks = np.stack([
|
||||
mmcv.impad(
|
||||
mask,
|
||||
padding=(int(results['pad_param'][2]),
|
||||
int(results['pad_param'][0]),
|
||||
int(results['pad_param'][3]),
|
||||
int(results['pad_param'][1])),
|
||||
pad_val=self.pad_val.get('masks', 0)) for mask in gt_masks
|
||||
])
|
||||
results['gt_masks'] = type(results['gt_masks'])(
|
||||
padded_masks, *results['img_shape'][:2])
|
||||
|
||||
def _resize_bboxes(self, results: dict) -> None:
|
||||
"""Resize bounding boxes with ``results['scale_factor']``."""
|
||||
if results.get('gt_bboxes', None) is None:
|
||||
return
|
||||
results['gt_bboxes'].rescale_(results['scale_factor'])
|
||||
|
||||
if len(results['pad_param']) != 4:
|
||||
return
|
||||
results['gt_bboxes'].translate_(
|
||||
(results['pad_param'][2], results['pad_param'][1]))
|
||||
|
||||
if self.clip_object_border:
|
||||
results['gt_bboxes'].clip_(results['img_shape'])
|
||||
|
||||
|
||||
# TODO: Check if it can be merged with mmdet.YOLOXHSVRandomAug
|
||||
@TRANSFORMS.register_module()
|
||||
class YOLOv5HSVRandomAug(BaseTransform):
|
||||
"""Apply HSV augmentation to image sequentially.
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img
|
||||
|
||||
Modified Keys:
|
||||
|
||||
- img
|
||||
|
||||
Args:
|
||||
hue_delta ([int, float]): delta of hue. Defaults to 0.015.
|
||||
saturation_delta ([int, float]): delta of saturation. Defaults to 0.7.
|
||||
value_delta ([int, float]): delta of value. Defaults to 0.4.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
hue_delta: Union[int, float] = 0.015,
|
||||
saturation_delta: Union[int, float] = 0.7,
|
||||
value_delta: Union[int, float] = 0.4):
|
||||
self.hue_delta = hue_delta
|
||||
self.saturation_delta = saturation_delta
|
||||
self.value_delta = value_delta
|
||||
|
||||
def transform(self, results: dict) -> dict:
|
||||
hsv_gains = \
|
||||
random.uniform(-1, 1, 3) * \
|
||||
[self.hue_delta, self.saturation_delta, self.value_delta] + 1
|
||||
hue, sat, val = cv2.split(
|
||||
cv2.cvtColor(results['img'], cv2.COLOR_BGR2HSV))
|
||||
|
||||
table_list = np.arange(0, 256, dtype=hsv_gains.dtype)
|
||||
lut_hue = ((table_list * hsv_gains[0]) % 180).astype(np.uint8)
|
||||
lut_sat = np.clip(table_list * hsv_gains[1], 0, 255).astype(np.uint8)
|
||||
lut_val = np.clip(table_list * hsv_gains[2], 0, 255).astype(np.uint8)
|
||||
|
||||
im_hsv = cv2.merge(
|
||||
(cv2.LUT(hue, lut_hue), cv2.LUT(sat,
|
||||
lut_sat), cv2.LUT(val, lut_val)))
|
||||
results['img'] = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR)
|
||||
return results
|
||||
|
||||
|
||||
# TODO: can be accelerated
|
||||
@TRANSFORMS.register_module()
|
||||
class LoadAnnotations(MMDET_LoadAnnotations):
|
||||
"""Because the yolo series does not need to consider ignore bboxes for the
|
||||
time being, in order to speed up the pipeline, it can be excluded in
|
||||
advance."""
|
||||
|
||||
def _load_bboxes(self, results: dict) -> None:
|
||||
"""Private function to load bounding box annotations.
|
||||
|
||||
Note: BBoxes with ignore_flag of 1 is not considered.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
|
||||
|
||||
Returns:
|
||||
dict: The dict contains loaded bounding box annotations.
|
||||
"""
|
||||
gt_bboxes = []
|
||||
gt_ignore_flags = []
|
||||
for instance in results['instances']:
|
||||
if instance['ignore_flag'] == 0:
|
||||
gt_bboxes.append(instance['bbox'])
|
||||
gt_ignore_flags.append(instance['ignore_flag'])
|
||||
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool)
|
||||
|
||||
if self.box_type is None:
|
||||
results['gt_bboxes'] = np.array(
|
||||
gt_bboxes, dtype=np.float32).reshape((-1, 4))
|
||||
else:
|
||||
_, box_type_cls = get_box_type(self.box_type)
|
||||
results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32)
|
||||
|
||||
def _load_labels(self, results: dict) -> None:
|
||||
"""Private function to load label annotations.
|
||||
|
||||
Note: BBoxes with ignore_flag of 1 is not considered.
|
||||
|
||||
Args:
|
||||
results (dict): Result dict from :obj:``mmengine.BaseDataset``.
|
||||
|
||||
Returns:
|
||||
dict: The dict contains loaded label annotations.
|
||||
"""
|
||||
gt_bboxes_labels = []
|
||||
for instance in results['instances']:
|
||||
if instance['ignore_flag'] == 0:
|
||||
gt_bboxes_labels.append(instance['bbox_label'])
|
||||
results['gt_bboxes_labels'] = np.array(
|
||||
gt_bboxes_labels, dtype=np.int64)
|
||||
|
||||
|
||||
@TRANSFORMS.register_module()
|
||||
class YOLOv5RandomAffine(BaseTransform):
|
||||
"""Random affine transform data augmentation in YOLOv5. It is different
|
||||
from the implementation in YOLOX.
|
||||
|
||||
This operation randomly generates affine transform matrix which including
|
||||
rotation, translation, shear and scaling transforms.
|
||||
|
||||
Required Keys:
|
||||
|
||||
- img
|
||||
- gt_bboxes (BaseBoxes[torch.float32]) (optional)
|
||||
- gt_bboxes_labels (np.int64) (optional)
|
||||
- gt_ignore_flags (np.bool) (optional)
|
||||
|
||||
Modified Keys:
|
||||
|
||||
- img
|
||||
- img_shape
|
||||
- gt_bboxes (optional)
|
||||
- gt_bboxes_labels (optional)
|
||||
- gt_ignore_flags (optional)
|
||||
|
||||
Args:
|
||||
max_rotate_degree (float): Maximum degrees of rotation transform.
|
||||
Defaults to 10.
|
||||
max_translate_ratio (float): Maximum ratio of translation.
|
||||
Defaults to 0.1.
|
||||
scaling_ratio_range (tuple[float]): Min and max ratio of
|
||||
scaling transform. Defaults to (0.5, 1.5).
|
||||
max_shear_degree (float): Maximum degrees of shear
|
||||
transform. Defaults to 2.
|
||||
border (tuple[int]): Distance from height and width sides of input
|
||||
image to adjust output shape. Only used in mosaic dataset.
|
||||
Defaults to (0, 0).
|
||||
border_val (tuple[int]): Border padding values of 3 channels.
|
||||
Defaults to (114, 114, 114).
|
||||
bbox_clip_border (bool, optional): Whether to clip the objects outside
|
||||
the border of the image. In some dataset like MOT17, the gt bboxes
|
||||
are allowed to cross the border of images. Therefore, we don't
|
||||
need to clip the gt bboxes in these cases. Defaults to True.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
max_rotate_degree: float = 10.0,
|
||||
max_translate_ratio: float = 0.1,
|
||||
scaling_ratio_range: Tuple[float, float] = (0.5, 1.5),
|
||||
max_shear_degree: float = 2.0,
|
||||
border: Tuple[int, int] = (0, 0),
|
||||
border_val: Tuple[int, int, int] = (114, 114, 114),
|
||||
bbox_clip_border: bool = True,
|
||||
min_bbox_size=2,
|
||||
min_area_ratio=0.1,
|
||||
max_aspect_ratio=20) -> None:
|
||||
assert 0 <= max_translate_ratio <= 1
|
||||
assert scaling_ratio_range[0] <= scaling_ratio_range[1]
|
||||
assert scaling_ratio_range[0] > 0
|
||||
self.max_rotate_degree = max_rotate_degree
|
||||
self.max_translate_ratio = max_translate_ratio
|
||||
self.scaling_ratio_range = scaling_ratio_range
|
||||
self.max_shear_degree = max_shear_degree
|
||||
self.border = border
|
||||
self.border_val = border_val
|
||||
self.bbox_clip_border = bbox_clip_border
|
||||
self.min_bbox_size = min_bbox_size
|
||||
self.min_bbox_size = min_bbox_size
|
||||
self.min_area_ratio = min_area_ratio
|
||||
self.max_aspect_ratio = max_aspect_ratio
|
||||
|
||||
@cache_randomness
|
||||
def _get_random_homography_matrix(self, height, width):
|
||||
# Rotation
|
||||
rotation_degree = random.uniform(-self.max_rotate_degree,
|
||||
self.max_rotate_degree)
|
||||
rotation_matrix = self._get_rotation_matrix(rotation_degree)
|
||||
|
||||
# Scaling
|
||||
scaling_ratio = random.uniform(self.scaling_ratio_range[0],
|
||||
self.scaling_ratio_range[1])
|
||||
scaling_matrix = self._get_scaling_matrix(scaling_ratio)
|
||||
|
||||
# Shear
|
||||
x_degree = random.uniform(-self.max_shear_degree,
|
||||
self.max_shear_degree)
|
||||
y_degree = random.uniform(-self.max_shear_degree,
|
||||
self.max_shear_degree)
|
||||
shear_matrix = self._get_shear_matrix(x_degree, y_degree)
|
||||
|
||||
# Translation
|
||||
trans_x = random.uniform(0.5 - self.max_translate_ratio,
|
||||
0.5 + self.max_translate_ratio) * width
|
||||
trans_y = random.uniform(0.5 - self.max_translate_ratio,
|
||||
0.5 + self.max_translate_ratio) * height
|
||||
translate_matrix = self._get_translation_matrix(trans_x, trans_y)
|
||||
warp_matrix = (
|
||||
translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix)
|
||||
return warp_matrix, scaling_ratio
|
||||
|
||||
@autocast_box_type()
|
||||
def transform(self, results: dict) -> dict:
|
||||
img = results['img']
|
||||
height = img.shape[0] + self.border[0] * 2
|
||||
width = img.shape[1] + self.border[1] * 2
|
||||
|
||||
# Note: Different from YOLOX
|
||||
center_matrix = np.eye(3, dtype=np.float32)
|
||||
center_matrix[0, 2] = -img.shape[1] / 2
|
||||
center_matrix[1, 2] = -img.shape[0] / 2
|
||||
|
||||
warp_matrix, scaling_ratio = self._get_random_homography_matrix(
|
||||
height, width)
|
||||
warp_matrix = warp_matrix @ center_matrix
|
||||
|
||||
img = cv2.warpPerspective(
|
||||
img,
|
||||
warp_matrix,
|
||||
dsize=(width, height),
|
||||
borderValue=self.border_val)
|
||||
results['img'] = img
|
||||
results['img_shape'] = img.shape
|
||||
|
||||
bboxes = results['gt_bboxes']
|
||||
num_bboxes = len(bboxes)
|
||||
if num_bboxes:
|
||||
orig_bboxes = bboxes.clone()
|
||||
|
||||
bboxes.project_(warp_matrix)
|
||||
if self.bbox_clip_border:
|
||||
bboxes.clip_([height, width])
|
||||
|
||||
# filter bboxes
|
||||
orig_bboxes.rescale_([scaling_ratio, scaling_ratio])
|
||||
|
||||
# Be careful: valid_index must convert to numpy,
|
||||
# otherwise it will raise out of bounds when len(valid_index)=1
|
||||
valid_index = self.filter_gt_bboxes(orig_bboxes, bboxes).numpy()
|
||||
results['gt_bboxes'] = bboxes[valid_index]
|
||||
results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
|
||||
valid_index]
|
||||
results['gt_ignore_flags'] = results['gt_ignore_flags'][
|
||||
valid_index]
|
||||
|
||||
if 'gt_masks' in results:
|
||||
raise NotImplementedError('RandomAffine only supports bbox.')
|
||||
return results
|
||||
|
||||
def filter_gt_bboxes(self, origin_bboxes, wrapped_bboxes):
|
||||
origin_w = origin_bboxes.widths
|
||||
origin_h = origin_bboxes.heights
|
||||
wrapped_w = wrapped_bboxes.widths
|
||||
wrapped_h = wrapped_bboxes.heights
|
||||
aspect_ratio = np.maximum(wrapped_w / (wrapped_h + 1e-16),
|
||||
wrapped_h / (wrapped_w + 1e-16))
|
||||
|
||||
wh_valid_idx = (wrapped_w > self.min_bbox_size) & \
|
||||
(wrapped_h > self.min_bbox_size)
|
||||
area_valid_idx = wrapped_w * wrapped_h / (origin_w * origin_h +
|
||||
1e-16) > self.min_area_ratio
|
||||
aspect_ratio_valid_idx = aspect_ratio < self.max_aspect_ratio
|
||||
return wh_valid_idx & area_valid_idx & aspect_ratio_valid_idx
|
||||
|
||||
def __repr__(self):
|
||||
repr_str = self.__class__.__name__
|
||||
repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
|
||||
repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
|
||||
repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, '
|
||||
repr_str += f'max_shear_degree={self.max_shear_degree}, '
|
||||
repr_str += f'border={self.border}, '
|
||||
repr_str += f'border_val={self.border_val}, '
|
||||
repr_str += f'bbox_clip_border={self.bbox_clip_border})'
|
||||
return repr_str
|
||||
|
||||
@staticmethod
|
||||
def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray:
|
||||
radian = math.radians(rotate_degrees)
|
||||
rotation_matrix = np.array(
|
||||
[[np.cos(radian), -np.sin(radian), 0.],
|
||||
[np.sin(radian), np.cos(radian), 0.], [0., 0., 1.]],
|
||||
dtype=np.float32)
|
||||
return rotation_matrix
|
||||
|
||||
@staticmethod
|
||||
def _get_scaling_matrix(scale_ratio: float) -> np.ndarray:
|
||||
scaling_matrix = np.array(
|
||||
[[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],
|
||||
dtype=np.float32)
|
||||
return scaling_matrix
|
||||
|
||||
@staticmethod
|
||||
def _get_shear_matrix(x_shear_degrees: float,
|
||||
y_shear_degrees: float) -> np.ndarray:
|
||||
x_radian = math.radians(x_shear_degrees)
|
||||
y_radian = math.radians(y_shear_degrees)
|
||||
shear_matrix = np.array([[1, np.tan(x_radian), 0.],
|
||||
[np.tan(y_radian), 1, 0.], [0., 0., 1.]],
|
||||
dtype=np.float32)
|
||||
return shear_matrix
|
||||
|
||||
@staticmethod
|
||||
def _get_translation_matrix(x: float, y: float) -> np.ndarray:
|
||||
translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],
|
||||
dtype=np.float32)
|
||||
return translation_matrix
|
Loading…
Reference in New Issue