158 lines
5.3 KiB
Python
158 lines
5.3 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from PIL import Image
|
|
import random
|
|
import numpy as np
|
|
import math
|
|
|
|
import torch
|
|
from torchvision.transforms import *
|
|
|
|
|
|
class Random2DTranslation(object):
|
|
"""
|
|
With a probability, first increase image size to (1 + 1/8), and then perform random crop.
|
|
|
|
Args:
|
|
- height (int): target image height.
|
|
- width (int): target image width.
|
|
- p (float): probability of performing this transformation. Default: 0.5.
|
|
"""
|
|
|
|
def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR):
|
|
self.height = height
|
|
self.width = width
|
|
self.p = p
|
|
self.interpolation = interpolation
|
|
|
|
def __call__(self, img):
|
|
"""
|
|
Args:
|
|
- img (PIL Image): Image to be cropped.
|
|
"""
|
|
if random.uniform(0, 1) > self.p:
|
|
return img.resize((self.width, self.height), self.interpolation)
|
|
|
|
new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125))
|
|
resized_img = img.resize((new_width, new_height), self.interpolation)
|
|
x_maxrange = new_width - self.width
|
|
y_maxrange = new_height - self.height
|
|
x1 = int(round(random.uniform(0, x_maxrange)))
|
|
y1 = int(round(random.uniform(0, y_maxrange)))
|
|
croped_img = resized_img.crop((x1, y1, x1 + self.width, y1 + self.height))
|
|
return croped_img
|
|
|
|
|
|
class RandomErasing(object):
|
|
'''
|
|
Class that performs Random Erasing in Random Erasing Data Augmentation by Zhong et al.
|
|
-------------------------------------------------------------------------------------
|
|
probability: The probability that the operation will be performed.
|
|
sl: min erasing area
|
|
sh: max erasing area
|
|
r1: min aspect ratio
|
|
mean: erasing value
|
|
-------------------------------------------------------------------------------------
|
|
Origin: https://github.com/zhunzhong07/Random-Erasing
|
|
'''
|
|
|
|
def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]):
|
|
self.probability = probability
|
|
self.mean = mean
|
|
self.sl = sl
|
|
self.sh = sh
|
|
self.r1 = r1
|
|
|
|
def __call__(self, img):
|
|
|
|
if random.uniform(0, 1) > self.probability:
|
|
return img
|
|
|
|
for attempt in range(100):
|
|
area = img.size()[1] * img.size()[2]
|
|
|
|
target_area = random.uniform(self.sl, self.sh) * area
|
|
aspect_ratio = random.uniform(self.r1, 1/self.r1)
|
|
|
|
h = int(round(math.sqrt(target_area * aspect_ratio)))
|
|
w = int(round(math.sqrt(target_area / aspect_ratio)))
|
|
|
|
if w < img.size()[2] and h < img.size()[1]:
|
|
x1 = random.randint(0, img.size()[1] - h)
|
|
y1 = random.randint(0, img.size()[2] - w)
|
|
if img.size()[0] == 3:
|
|
img[0, x1:x1+h, y1:y1+w] = self.mean[0]
|
|
img[1, x1:x1+h, y1:y1+w] = self.mean[1]
|
|
img[2, x1:x1+h, y1:y1+w] = self.mean[2]
|
|
else:
|
|
img[0, x1:x1+h, y1:y1+w] = self.mean[0]
|
|
return img
|
|
|
|
return img
|
|
|
|
|
|
class ColorAugmentation(object):
|
|
"""
|
|
Randomly alter the intensities of RGB channels
|
|
|
|
Reference:
|
|
Krizhevsky et al. ImageNet Classification with Deep ConvolutionalNeural Networks. NIPS 2012.
|
|
"""
|
|
|
|
def __init__(self, p=0.5):
|
|
self.p = p
|
|
self.eig_vec = torch.Tensor([
|
|
[0.4009, 0.7192, -0.5675],
|
|
[-0.8140, -0.0045, -0.5808],
|
|
[0.4203, -0.6948, -0.5836],
|
|
])
|
|
self.eig_val = torch.Tensor([[0.2175, 0.0188, 0.0045]])
|
|
|
|
def _check_input(self, tensor):
|
|
assert tensor.dim() == 3 and tensor.size(0) == 3
|
|
|
|
def __call__(self, tensor):
|
|
if random.uniform(0, 1) > self.p:
|
|
return tensor
|
|
alpha = torch.normal(mean=torch.zeros_like(self.eig_val)) * 0.1
|
|
quatity = torch.mm(self.eig_val * alpha, self.eig_vec)
|
|
tensor = tensor + quatity.view(3, 1, 1)
|
|
return tensor
|
|
|
|
|
|
def build_transforms(height,
|
|
width,
|
|
random_erase=False, # use random erasing for data augmentation
|
|
color_jitter=False, # randomly change the brightness, contrast and saturation
|
|
color_aug=False, # randomly alter the intensities of RGB channels
|
|
**kwargs):
|
|
# use imagenet mean and std as default
|
|
# TODO: compute dataset-specific mean and std
|
|
imagenet_mean = [0.485, 0.456, 0.406]
|
|
imagenet_std = [0.229, 0.224, 0.225]
|
|
normalize = Normalize(mean=imagenet_mean, std=imagenet_std)
|
|
|
|
# build train transformations
|
|
transform_train = []
|
|
transform_train += [Random2DTranslation(height, width)]
|
|
transform_train += [RandomHorizontalFlip()]
|
|
if color_jitter:
|
|
transform_train += [ColorJitter(brightness=0.2, contrast=0.15, saturation=0, hue=0)]
|
|
transform_train += [ToTensor()]
|
|
if color_aug:
|
|
transform_train += [ColorAugmentation()]
|
|
transform_train += [normalize]
|
|
if random_erase:
|
|
transform_train += [RandomErasing()]
|
|
transform_train = Compose(transform_train)
|
|
|
|
# build test transformations
|
|
transform_test = Compose([
|
|
Resize((height, width)),
|
|
ToTensor(),
|
|
normalize,
|
|
])
|
|
|
|
return transform_train, transform_test |