2023-02-08 15:52:30 +08:00
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
# @Time : 2019/8/23 21:52
|
|
|
|
|
# @Author : zhoujun
|
|
|
|
|
|
|
|
|
|
import math
|
|
|
|
|
import numbers
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
import cv2
|
|
|
|
|
import numpy as np
|
|
|
|
|
from skimage.util import random_noise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RandomNoise:
|
|
|
|
|
def __init__(self, random_rate):
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict):
|
|
|
|
|
"""
|
|
|
|
|
对图片加噪声
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = (
|
|
|
|
|
random_noise(data["img"], mode="gaussian", clip=True) * 255
|
2024-05-22 13:02:24 +08:00
|
|
|
|
).astype(data["img"].dtype)
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RandomScale:
|
|
|
|
|
def __init__(self, scales, random_rate):
|
|
|
|
|
"""
|
|
|
|
|
:param scales: 尺度
|
2025-03-28 16:28:38 +08:00
|
|
|
|
:param random_rate: 随机系数
|
2023-02-08 15:52:30 +08:00
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
self.scales = scales
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
从scales中随机选择一个尺度,对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
tmp_text_polys = text_polys.copy()
|
|
|
|
|
rd_scale = float(np.random.choice(self.scales))
|
|
|
|
|
im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
|
|
|
|
|
tmp_text_polys *= rd_scale
|
|
|
|
|
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = im
|
|
|
|
|
data["text_polys"] = tmp_text_polys
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RandomRotateImgBox:
|
|
|
|
|
def __init__(self, degrees, random_rate, same_size=False):
|
|
|
|
|
"""
|
|
|
|
|
:param degrees: 角度,可以是一个数值或者list
|
2025-03-28 16:28:38 +08:00
|
|
|
|
:param random_rate: 随机系数
|
2023-02-08 15:52:30 +08:00
|
|
|
|
:param same_size: 是否保持和原图一样大
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(degrees, numbers.Number):
|
|
|
|
|
if degrees < 0:
|
2024-04-21 21:46:20 +08:00
|
|
|
|
raise ValueError("If degrees is a single number, it must be positive.")
|
2023-02-08 15:52:30 +08:00
|
|
|
|
degrees = (-degrees, degrees)
|
2024-04-21 21:46:20 +08:00
|
|
|
|
elif (
|
|
|
|
|
isinstance(degrees, list)
|
|
|
|
|
or isinstance(degrees, tuple)
|
|
|
|
|
or isinstance(degrees, np.ndarray)
|
|
|
|
|
):
|
2023-02-08 15:52:30 +08:00
|
|
|
|
if len(degrees) != 2:
|
2024-04-21 21:46:20 +08:00
|
|
|
|
raise ValueError("If degrees is a sequence, it must be of len 2.")
|
2023-02-08 15:52:30 +08:00
|
|
|
|
degrees = degrees
|
|
|
|
|
else:
|
2024-04-21 21:46:20 +08:00
|
|
|
|
raise Exception("degrees must in Number or list or tuple or np.ndarray")
|
2023-02-08 15:52:30 +08:00
|
|
|
|
self.degrees = degrees
|
|
|
|
|
self.same_size = same_size
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
从scales中随机选择一个尺度,对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
# ---------------------- 旋转图像 ----------------------
|
|
|
|
|
w = im.shape[1]
|
|
|
|
|
h = im.shape[0]
|
|
|
|
|
angle = np.random.uniform(self.degrees[0], self.degrees[1])
|
|
|
|
|
|
|
|
|
|
if self.same_size:
|
|
|
|
|
nw = w
|
|
|
|
|
nh = h
|
|
|
|
|
else:
|
|
|
|
|
# 角度变弧度
|
|
|
|
|
rangle = np.deg2rad(angle)
|
|
|
|
|
# 计算旋转之后图像的w, h
|
2024-04-21 21:46:20 +08:00
|
|
|
|
nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)
|
|
|
|
|
nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)
|
2023-02-08 15:52:30 +08:00
|
|
|
|
# 构造仿射矩阵
|
|
|
|
|
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, 1)
|
|
|
|
|
# 计算原图中心点到新图中心点的偏移量
|
2024-04-21 21:46:20 +08:00
|
|
|
|
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
|
2023-02-08 15:52:30 +08:00
|
|
|
|
# 更新仿射矩阵
|
|
|
|
|
rot_mat[0, 2] += rot_move[0]
|
|
|
|
|
rot_mat[1, 2] += rot_move[1]
|
|
|
|
|
# 仿射变换
|
|
|
|
|
rot_img = cv2.warpAffine(
|
|
|
|
|
im,
|
2024-04-21 21:46:20 +08:00
|
|
|
|
rot_mat,
|
|
|
|
|
(int(math.ceil(nw)), int(math.ceil(nh))),
|
|
|
|
|
flags=cv2.INTER_LANCZOS4,
|
|
|
|
|
)
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
# ---------------------- 矫正bbox坐标 ----------------------
|
|
|
|
|
# rot_mat是最终的旋转矩阵
|
|
|
|
|
# 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
|
|
|
|
|
rot_text_polys = list()
|
|
|
|
|
for bbox in text_polys:
|
|
|
|
|
point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
|
|
|
|
|
point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
|
|
|
|
|
point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
|
|
|
|
|
point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
|
|
|
|
|
rot_text_polys.append([point1, point2, point3, point4])
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = rot_img
|
|
|
|
|
data["text_polys"] = np.array(rot_text_polys)
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RandomResize:
|
|
|
|
|
def __init__(self, size, random_rate, keep_ratio=False):
|
|
|
|
|
"""
|
|
|
|
|
:param input_size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
|
2025-03-28 16:28:38 +08:00
|
|
|
|
:param random_rate: 随机系数
|
2023-02-08 15:52:30 +08:00
|
|
|
|
:param keep_ratio: 是否保持长宽比
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(size, numbers.Number):
|
|
|
|
|
if size < 0:
|
|
|
|
|
raise ValueError(
|
2024-04-21 21:46:20 +08:00
|
|
|
|
"If input_size is a single number, it must be positive."
|
|
|
|
|
)
|
2023-02-08 15:52:30 +08:00
|
|
|
|
size = (size, size)
|
2024-04-21 21:46:20 +08:00
|
|
|
|
elif (
|
|
|
|
|
isinstance(size, list)
|
|
|
|
|
or isinstance(size, tuple)
|
|
|
|
|
or isinstance(size, np.ndarray)
|
|
|
|
|
):
|
2023-02-08 15:52:30 +08:00
|
|
|
|
if len(size) != 2:
|
2024-04-21 21:46:20 +08:00
|
|
|
|
raise ValueError("If input_size is a sequence, it must be of len 2.")
|
2023-02-08 15:52:30 +08:00
|
|
|
|
size = (size[0], size[1])
|
|
|
|
|
else:
|
2024-04-21 21:46:20 +08:00
|
|
|
|
raise Exception("input_size must in Number or list or tuple or np.ndarray")
|
2023-02-08 15:52:30 +08:00
|
|
|
|
self.size = size
|
|
|
|
|
self.keep_ratio = keep_ratio
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
从scales中随机选择一个尺度,对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
if self.keep_ratio:
|
|
|
|
|
# 将图片短边pad到和长边一样
|
|
|
|
|
h, w, c = im.shape
|
|
|
|
|
max_h = max(h, self.size[0])
|
|
|
|
|
max_w = max(w, self.size[1])
|
|
|
|
|
im_padded = np.zeros((max_h, max_w, c), dtype=np.uint8)
|
|
|
|
|
im_padded[:h, :w] = im.copy()
|
|
|
|
|
im = im_padded
|
|
|
|
|
text_polys = text_polys.astype(np.float32)
|
|
|
|
|
h, w, _ = im.shape
|
|
|
|
|
im = cv2.resize(im, self.size)
|
|
|
|
|
w_scale = self.size[0] / float(w)
|
|
|
|
|
h_scale = self.size[1] / float(h)
|
|
|
|
|
text_polys[:, :, 0] *= w_scale
|
|
|
|
|
text_polys[:, :, 1] *= h_scale
|
|
|
|
|
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = im
|
|
|
|
|
data["text_polys"] = text_polys
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resize_image(img, short_size):
|
|
|
|
|
height, width, _ = img.shape
|
|
|
|
|
if height < width:
|
|
|
|
|
new_height = short_size
|
|
|
|
|
new_width = new_height / height * width
|
|
|
|
|
else:
|
|
|
|
|
new_width = short_size
|
|
|
|
|
new_height = new_width / width * height
|
|
|
|
|
new_height = int(round(new_height / 32) * 32)
|
|
|
|
|
new_width = int(round(new_width / 32) * 32)
|
|
|
|
|
resized_img = cv2.resize(img, (new_width, new_height))
|
|
|
|
|
return resized_img, (new_width / width, new_height / height)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResizeShortSize:
|
|
|
|
|
def __init__(self, short_size, resize_text_polys=True):
|
|
|
|
|
"""
|
|
|
|
|
:param size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
self.short_size = short_size
|
|
|
|
|
self.resize_text_polys = resize_text_polys
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
h, w, _ = im.shape
|
|
|
|
|
short_edge = min(h, w)
|
|
|
|
|
if short_edge < self.short_size:
|
|
|
|
|
# 保证短边 >= short_size
|
|
|
|
|
scale = self.short_size / short_edge
|
|
|
|
|
im = cv2.resize(im, dsize=None, fx=scale, fy=scale)
|
|
|
|
|
scale = (scale, scale)
|
|
|
|
|
# im, scale = resize_image(im, self.short_size)
|
|
|
|
|
if self.resize_text_polys:
|
|
|
|
|
# text_polys *= scale
|
|
|
|
|
text_polys[:, 0] *= scale[0]
|
|
|
|
|
text_polys[:, 1] *= scale[1]
|
|
|
|
|
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = im
|
|
|
|
|
data["text_polys"] = text_polys
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HorizontalFlip:
|
|
|
|
|
def __init__(self, random_rate):
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
:param random_rate: 随机系数
|
|
|
|
|
"""
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
从scales中随机选择一个尺度,对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
flip_text_polys = text_polys.copy()
|
|
|
|
|
flip_im = cv2.flip(im, 1)
|
|
|
|
|
h, w, _ = flip_im.shape
|
|
|
|
|
flip_text_polys[:, :, 0] = w - flip_text_polys[:, :, 0]
|
|
|
|
|
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = flip_im
|
|
|
|
|
data["text_polys"] = flip_text_polys
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
2025-03-28 16:28:38 +08:00
|
|
|
|
class VerticalFlip:
|
2023-02-08 15:52:30 +08:00
|
|
|
|
def __init__(self, random_rate):
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
:param random_rate: 随机系数
|
|
|
|
|
"""
|
|
|
|
|
self.random_rate = random_rate
|
|
|
|
|
|
|
|
|
|
def __call__(self, data: dict) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
从scales中随机选择一个尺度,对图片和文本框进行缩放
|
|
|
|
|
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if random.random() > self.random_rate:
|
|
|
|
|
return data
|
2024-04-21 21:46:20 +08:00
|
|
|
|
im = data["img"]
|
|
|
|
|
text_polys = data["text_polys"]
|
2023-02-08 15:52:30 +08:00
|
|
|
|
|
|
|
|
|
flip_text_polys = text_polys.copy()
|
|
|
|
|
flip_im = cv2.flip(im, 0)
|
|
|
|
|
h, w, _ = flip_im.shape
|
|
|
|
|
flip_text_polys[:, :, 1] = h - flip_text_polys[:, :, 1]
|
2024-04-21 21:46:20 +08:00
|
|
|
|
data["img"] = flip_im
|
|
|
|
|
data["text_polys"] = flip_text_polys
|
2023-02-08 15:52:30 +08:00
|
|
|
|
return data
|