from __future__ import absolute_import from __future__ import division from torchvision.transforms import * import torch from PIL import Image import random import numpy as np class Random2DTranslation(object): """ With a probability, first increase image size to (1 + 1/8), and then perform random crop. Args: - height (int): target image height. - width (int): target image width. - p (float): probability of performing this transformation. Default: 0.5. """ def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR): self.height = height self.width = width self.p = p self.interpolation = interpolation def __call__(self, img): """ Args: - img (PIL Image): Image to be cropped. """ if random.uniform(0, 1) > self.p: return img.resize((self.width, self.height), self.interpolation) new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125)) resized_img = img.resize((new_width, new_height), self.interpolation) x_maxrange = new_width - self.width y_maxrange = new_height - self.height x1 = int(round(random.uniform(0, x_maxrange))) y1 = int(round(random.uniform(0, y_maxrange))) croped_img = resized_img.crop((x1, y1, x1 + self.width, y1 + self.height)) return croped_img def build_transforms(height, width, is_train, **kwargs): """Build transforms Args: - height (int): target image height. - width (int): target image width. - is_train (bool): train or test phase. """ # use imagenet mean and std as default imagenet_mean = [0.485, 0.456, 0.406] imagenet_std = [0.229, 0.224, 0.225] normalize = Normalize(mean=imagenet_mean, std=imagenet_std) transforms = [] if is_train: transforms += [Random2DTranslation(height, width)] transforms += [RandomHorizontalFlip()] else: transforms += [Resize((height, width))] transforms += [ToTensor()] transforms += [normalize] transforms = Compose(transforms) return transforms