mirror of
https://github.com/YifanXu74/MQ-Det.git
synced 2025-06-03 15:03:07 +08:00
386 lines
14 KiB
Python
386 lines
14 KiB
Python
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||
|
import cv2
|
||
|
import random
|
||
|
import numpy as np
|
||
|
import math
|
||
|
import torch
|
||
|
import torchvision
|
||
|
from torchvision.transforms import functional as F
|
||
|
|
||
|
from maskrcnn_benchmark.structures.bounding_box import BoxList
|
||
|
|
||
|
def matrix_iou(a, b, relative=False):
|
||
|
"""
|
||
|
return iou of a and b, numpy version for data augenmentation
|
||
|
"""
|
||
|
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
|
||
|
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
|
||
|
|
||
|
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
|
||
|
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
|
||
|
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
|
||
|
if relative:
|
||
|
ious = area_i / (area_b[:, np.newaxis]+1e-12)
|
||
|
else:
|
||
|
ious = area_i / (area_a[:, np.newaxis] + area_b - area_i+1e-12)
|
||
|
return ious
|
||
|
|
||
|
|
||
|
class RACompose(object):
|
||
|
def __init__(self, pre_transforms, rand_transforms, post_transforms, concurrent=2):
|
||
|
self.preprocess = pre_transforms
|
||
|
self.transforms = post_transforms
|
||
|
self.rand_transforms = rand_transforms
|
||
|
self.concurrent = concurrent
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
for t in self.preprocess:
|
||
|
image, target = t(image, target)
|
||
|
for t in random.choices(self.rand_transforms, k=self.concurrent):
|
||
|
image = np.array(image)
|
||
|
image, target = t(image, target)
|
||
|
for t in self.transforms:
|
||
|
image, target = t(image, target)
|
||
|
|
||
|
return image, target
|
||
|
|
||
|
def __repr__(self):
|
||
|
format_string = self.__class__.__name__ + "("
|
||
|
for t in self.preprocess:
|
||
|
format_string += "\n"
|
||
|
format_string += " {0}".format(t)
|
||
|
format_string += "\nRandom select {0} from: (".format(self.concurrent)
|
||
|
for t in self.rand_transforms:
|
||
|
format_string += "\n"
|
||
|
format_string += " {0}".format(t)
|
||
|
format_string += ")\nThen, apply:"
|
||
|
for t in self.transforms:
|
||
|
format_string += "\n"
|
||
|
format_string += " {0}".format(t)
|
||
|
format_string += "\n)"
|
||
|
return format_string
|
||
|
|
||
|
|
||
|
class Compose(object):
|
||
|
def __init__(self, transforms):
|
||
|
self.transforms = transforms
|
||
|
|
||
|
def __call__(self, image, target=None):
|
||
|
for t in self.transforms:
|
||
|
image, target = t(image, target)
|
||
|
if target is None:
|
||
|
return image
|
||
|
return image, target
|
||
|
|
||
|
def __repr__(self):
|
||
|
format_string = self.__class__.__name__ + "("
|
||
|
for t in self.transforms:
|
||
|
format_string += "\n"
|
||
|
format_string += " {0}".format(t)
|
||
|
format_string += "\n)"
|
||
|
return format_string
|
||
|
|
||
|
|
||
|
class Resize(object):
|
||
|
def __init__(self, min_size, max_size, restrict=False):
|
||
|
if not isinstance(min_size, (list, tuple)):
|
||
|
min_size = (min_size,)
|
||
|
self.min_size = min_size
|
||
|
self.max_size = max_size
|
||
|
self.restrict = restrict
|
||
|
|
||
|
# modified from torchvision to add support for max size
|
||
|
def get_size(self, image_size):
|
||
|
w, h = image_size
|
||
|
size = random.choice(self.min_size)
|
||
|
max_size = self.max_size
|
||
|
if self.restrict:
|
||
|
return (size, max_size)
|
||
|
if max_size is not None:
|
||
|
min_original_size = float(min((w, h)))
|
||
|
max_original_size = float(max((w, h)))
|
||
|
if max_original_size / min_original_size * size > max_size:
|
||
|
size = int(round(max_size * min_original_size / max_original_size))
|
||
|
|
||
|
if (w <= h and w == size) or (h <= w and h == size):
|
||
|
return (h, w)
|
||
|
|
||
|
if w < h:
|
||
|
ow = size
|
||
|
oh = int(size * h / w)
|
||
|
else:
|
||
|
oh = size
|
||
|
ow = int(size * w / h)
|
||
|
|
||
|
return (oh, ow)
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
if isinstance(image, np.ndarray):
|
||
|
image_size = self.get_size(image.shape[:2])
|
||
|
image = cv2.resize(image, image_size)
|
||
|
new_size = image_size
|
||
|
else:
|
||
|
image = F.resize(image, self.get_size(image.size))
|
||
|
new_size = image.size
|
||
|
if target is not None:
|
||
|
target = target.resize(new_size)
|
||
|
return image, target
|
||
|
|
||
|
|
||
|
class RandomHorizontalFlip(object):
|
||
|
def __init__(self, prob=0.5):
|
||
|
self.prob = prob
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
if random.random() < self.prob:
|
||
|
if isinstance(image, np.ndarray):
|
||
|
image = np.fliplr(image)
|
||
|
else:
|
||
|
image = F.hflip(image)
|
||
|
if target is not None:
|
||
|
target = target.transpose(0)
|
||
|
return image, target
|
||
|
|
||
|
|
||
|
class RandomVerticalFlip(object):
|
||
|
def __init__(self, prob=0.5):
|
||
|
self.prob = prob
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
if random.random() < self.prob:
|
||
|
if isinstance(image, np.ndarray):
|
||
|
image = np.flipud(image)
|
||
|
else:
|
||
|
image = F.vflip(image)
|
||
|
target = target.transpose(1)
|
||
|
return image, target
|
||
|
|
||
|
class ToTensor(object):
|
||
|
def __call__(self, image, target):
|
||
|
return F.to_tensor(image), target
|
||
|
|
||
|
|
||
|
class Normalize(object):
|
||
|
def __init__(self, mean, std, format='rgb'):
|
||
|
self.mean = mean
|
||
|
self.std = std
|
||
|
self.format = format.lower()
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
if 'bgr' in self.format:
|
||
|
image = image[[2, 1, 0]]
|
||
|
if '255' in self.format:
|
||
|
image = image * 255
|
||
|
image = F.normalize(image, mean=self.mean, std=self.std)
|
||
|
return image, target
|
||
|
|
||
|
|
||
|
class ColorJitter(object):
|
||
|
def __init__(self,
|
||
|
brightness=0.0,
|
||
|
contrast=0.0,
|
||
|
saturation=0.0,
|
||
|
hue=0.0,
|
||
|
):
|
||
|
self.color_jitter = torchvision.transforms.ColorJitter(
|
||
|
brightness=brightness,
|
||
|
contrast=contrast,
|
||
|
saturation=saturation,
|
||
|
hue=hue,)
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
image = self.color_jitter(image)
|
||
|
return image, target
|
||
|
|
||
|
|
||
|
class RandomCrop(object):
|
||
|
def __init__(self, prob=0.5, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
|
||
|
# 1: return ori img
|
||
|
self.prob = prob
|
||
|
self.sample_mode = (1, *min_ious, 0)
|
||
|
self.min_crop_size = min_crop_size
|
||
|
|
||
|
def __call__(self, img, target):
|
||
|
if random.random() > self.prob:
|
||
|
return img, target
|
||
|
|
||
|
h, w, c = img.shape
|
||
|
boxes = target.bbox.numpy()
|
||
|
labels = target.get_field('labels')
|
||
|
|
||
|
while True:
|
||
|
mode = random.choice(self.sample_mode)
|
||
|
if mode == 1:
|
||
|
return img, target
|
||
|
|
||
|
min_iou = mode
|
||
|
|
||
|
new_w = random.uniform(self.min_crop_size * w, w)
|
||
|
new_h = random.uniform(self.min_crop_size * h, h)
|
||
|
|
||
|
# h / w in [0.5, 2]
|
||
|
if new_h / new_w < 0.5 or new_h / new_w > 2:
|
||
|
continue
|
||
|
|
||
|
left = random.uniform(0, w - new_w)
|
||
|
top = random.uniform(0, h - new_h)
|
||
|
|
||
|
patch = np.array([left, top, left + new_w, top + new_h])
|
||
|
overlaps = matrix_iou(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
|
||
|
if overlaps.min() < min_iou:
|
||
|
continue
|
||
|
|
||
|
# center of boxes should inside the crop img
|
||
|
center = (boxes[:, :2] + boxes[:, 2:]) / 2
|
||
|
mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3])
|
||
|
if not mask.any():
|
||
|
continue
|
||
|
|
||
|
boxes = boxes[mask]
|
||
|
labels = labels[mask]
|
||
|
|
||
|
# adjust boxes
|
||
|
img = img[int(patch[1]):int(patch[3]), int(patch[0]):int(patch[2])]
|
||
|
|
||
|
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
|
||
|
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
|
||
|
boxes -= np.tile(patch[:2], 2)
|
||
|
|
||
|
new_target = BoxList(boxes, (img.shape[1], img.shape[0]), mode='xyxy')
|
||
|
new_target.add_field('labels', labels)
|
||
|
return img, new_target
|
||
|
|
||
|
|
||
|
class RandomAffine(object):
|
||
|
def __init__(self, prob=0.5, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
|
||
|
borderValue=(127.5, 127.5, 127.5)):
|
||
|
self.prob = prob
|
||
|
self.degrees = degrees
|
||
|
self.translate = translate
|
||
|
self.scale = scale
|
||
|
self.shear = shear
|
||
|
self.borderValue = borderValue
|
||
|
|
||
|
def __call__(self, img, targets=None):
|
||
|
if random.random() > self.prob:
|
||
|
return img, targets
|
||
|
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
||
|
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
||
|
|
||
|
border = 0 # width of added border (optional)
|
||
|
#height = max(img.shape[0], img.shape[1]) + border * 2
|
||
|
height, width, _ = img.shape
|
||
|
bbox = targets.bbox
|
||
|
|
||
|
# Rotation and Scale
|
||
|
R = np.eye(3)
|
||
|
a = random.random() * (self.degrees[1] - self.degrees[0]) + self.degrees[0]
|
||
|
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
|
||
|
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
|
||
|
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
|
||
|
|
||
|
# Translation
|
||
|
T = np.eye(3)
|
||
|
T[0, 2] = (random.random() * 2 - 1) * self.translate[0] * img.shape[0] + border # x translation (pixels)
|
||
|
T[1, 2] = (random.random() * 2 - 1) * self.translate[1] * img.shape[1] + border # y translation (pixels)
|
||
|
|
||
|
# Shear
|
||
|
S = np.eye(3)
|
||
|
S[0, 1] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # x shear (deg)
|
||
|
S[1, 0] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # y shear (deg)
|
||
|
|
||
|
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
|
||
|
imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
|
||
|
borderValue=self.borderValue) # BGR order borderValue
|
||
|
|
||
|
# Return warped points also
|
||
|
if targets:
|
||
|
n = bbox.shape[0]
|
||
|
points = bbox[:, 0:4]
|
||
|
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
|
||
|
|
||
|
# warp points
|
||
|
xy = np.ones((n * 4, 3))
|
||
|
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||
|
xy = (xy @ M.T)[:, :2].reshape(n, 8)
|
||
|
|
||
|
# create new boxes
|
||
|
x = xy[:, [0, 2, 4, 6]]
|
||
|
y = xy[:, [1, 3, 5, 7]]
|
||
|
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||
|
|
||
|
# apply angle-based reduction
|
||
|
radians = a * math.pi / 180
|
||
|
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
||
|
x = (xy[:, 2] + xy[:, 0]) / 2
|
||
|
y = (xy[:, 3] + xy[:, 1]) / 2
|
||
|
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||
|
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||
|
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||
|
|
||
|
# reject warped points outside of image
|
||
|
x1 = np.clip(xy[:,0], 0, width)
|
||
|
y1 = np.clip(xy[:,1], 0, height)
|
||
|
x2 = np.clip(xy[:,2], 0, width)
|
||
|
y2 = np.clip(xy[:,3], 0, height)
|
||
|
new_bbox = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
|
||
|
targets.bbox = torch.as_tensor(new_bbox, dtype=torch.float32)
|
||
|
|
||
|
return imw, targets
|
||
|
|
||
|
|
||
|
class RandomErasing:
|
||
|
def __init__(self, prob=0.5, era_l=0.02, era_h=1/3, min_aspect=0.3,
|
||
|
mode='const', max_count=1, max_overlap=0.3, max_value=255):
|
||
|
self.prob = prob
|
||
|
self.era_l = era_l
|
||
|
self.era_h = era_h
|
||
|
self.min_aspect = min_aspect
|
||
|
self.min_count = 1
|
||
|
self.max_count = max_count
|
||
|
self.max_overlap = max_overlap
|
||
|
self.max_value = max_value
|
||
|
self.mode = mode.lower()
|
||
|
assert self.mode in ['const', 'rand', 'pixel'], 'invalid erase mode: %s' % self.mode
|
||
|
|
||
|
def _get_pixels(self, patch_size):
|
||
|
if self.mode == 'pixel':
|
||
|
return np.random.random(patch_size)*self.max_value
|
||
|
elif self.mode == 'rand':
|
||
|
return np.random.random((1, 1, patch_size[-1]))*self.max_value
|
||
|
else:
|
||
|
return np.zeros((1, 1, patch_size[-1]))
|
||
|
|
||
|
def __call__(self, image, target):
|
||
|
if random.random() > self.prob:
|
||
|
return image, target
|
||
|
ih, iw, ic = image.shape
|
||
|
ia = ih * iw
|
||
|
count = self.min_count if self.min_count == self.max_count else \
|
||
|
random.randint(self.min_count, self.max_count)
|
||
|
erase_boxes = []
|
||
|
for _ in range(count):
|
||
|
for try_idx in range(10):
|
||
|
erase_area = random.uniform(self.era_l, self.era_h) * ia / count
|
||
|
aspect_ratio = math.exp(random.uniform(math.log(self.min_aspect), math.log(1/self.min_aspect)))
|
||
|
eh = int(round(math.sqrt(erase_area * aspect_ratio)))
|
||
|
ew = int(round(math.sqrt(erase_area / aspect_ratio)))
|
||
|
if eh < ih and ew < iw:
|
||
|
x = random.randint(0, iw - ew)
|
||
|
y = random.randint(0, ih - eh)
|
||
|
image[y:y+eh, x:x+ew, :] = self._get_pixels((eh, ew, ic))
|
||
|
erase_boxes.append([x,y,x+ew,y+eh])
|
||
|
break
|
||
|
|
||
|
if target is not None and len(erase_boxes)>0:
|
||
|
boxes = target.bbox.numpy()
|
||
|
labels = target.get_field('labels')
|
||
|
overlap = matrix_iou(np.array(erase_boxes), boxes, relative=True)
|
||
|
mask = overlap.max(axis=0)<self.max_overlap
|
||
|
boxes = boxes[mask]
|
||
|
labels = labels[mask]
|
||
|
target.bbox = torch.as_tensor(boxes, dtype=torch.float32)
|
||
|
target.add_field('labels', labels)
|
||
|
|
||
|
return image, target
|