# encoding: utf-8 """ @author: liaoxingyu @contact: sherlockliao01@gmail.com """ import numpy as np import torch from PIL import Image, ImageOps, ImageEnhance def to_tensor(pic): """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. See ``ToTensor`` for more details. Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ if isinstance(pic, np.ndarray): assert len(pic.shape) in (2, 3) # handle numpy array if pic.ndim == 2: pic = pic[:, :, None] img = torch.from_numpy(pic.transpose((2, 0, 1))) # backward compatibility if isinstance(img, torch.ByteTensor): return img.float() else: return img # handle PIL Image if pic.mode == 'I': img = torch.from_numpy(np.array(pic, np.int32, copy=False)) elif pic.mode == 'I;16': img = torch.from_numpy(np.array(pic, np.int16, copy=False)) elif pic.mode == 'F': img = torch.from_numpy(np.array(pic, np.float32, copy=False)) elif pic.mode == '1': img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) # PIL image mode: L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK if pic.mode == 'YCbCr': nchannel = 3 elif pic.mode == 'I;16': nchannel = 1 else: nchannel = len(pic.mode) img = img.view(pic.size[1], pic.size[0], nchannel) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() if isinstance(img, torch.ByteTensor): return img.float() else: return img def int_parameter(level, maxval): """Helper function to scale `val` between 0 and maxval . Args: level: Level of the operation that will be between [0, `PARAMETER_MAX`]. maxval: Maximum value that the operation can have. This will be scaled to level/PARAMETER_MAX. Returns: An int that results from scaling `maxval` according to `level`. """ return int(level * maxval / 10) def float_parameter(level, maxval): """Helper function to scale `val` between 0 and maxval. Args: level: Level of the operation that will be between [0, `PARAMETER_MAX`]. maxval: Maximum value that the operation can have. This will be scaled to level/PARAMETER_MAX. Returns: A float that results from scaling `maxval` according to `level`. """ return float(level) * maxval / 10. def sample_level(n): return np.random.uniform(low=0.1, high=n) def autocontrast(pil_img, *args): return ImageOps.autocontrast(pil_img) def equalize(pil_img, *args): return ImageOps.equalize(pil_img) def posterize(pil_img, level, *args): level = int_parameter(sample_level(level), 4) return ImageOps.posterize(pil_img, 4 - level) def rotate(pil_img, level, *args): degrees = int_parameter(sample_level(level), 30) if np.random.uniform() > 0.5: degrees = -degrees return pil_img.rotate(degrees, resample=Image.BILINEAR) def solarize(pil_img, level, *args): level = int_parameter(sample_level(level), 256) return ImageOps.solarize(pil_img, 256 - level) def shear_x(pil_img, level): level = float_parameter(sample_level(level), 0.3) if np.random.uniform() > 0.5: level = -level return pil_img.transform(pil_img.size, Image.AFFINE, (1, level, 0, 0, 1, 0), resample=Image.BILINEAR) def shear_y(pil_img, level): level = float_parameter(sample_level(level), 0.3) if np.random.uniform() > 0.5: level = -level return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, 0, level, 1, 0), resample=Image.BILINEAR) def translate_x(pil_img, level): level = int_parameter(sample_level(level), pil_img.size[0] / 3) if np.random.random() > 0.5: level = -level return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, level, 0, 1, 0), resample=Image.BILINEAR) def translate_y(pil_img, level): level = int_parameter(sample_level(level), pil_img.size[1] / 3) if np.random.random() > 0.5: level = -level return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, 0, 0, 1, level), resample=Image.BILINEAR) # operation that overlaps with ImageNet-C's test set def color(pil_img, level, *args): level = float_parameter(sample_level(level), 1.8) + 0.1 return ImageEnhance.Color(pil_img).enhance(level) # operation that overlaps with ImageNet-C's test set def contrast(pil_img, level, *args): level = float_parameter(sample_level(level), 1.8) + 0.1 return ImageEnhance.Contrast(pil_img).enhance(level) # operation that overlaps with ImageNet-C's test set def brightness(pil_img, level, *args): level = float_parameter(sample_level(level), 1.8) + 0.1 return ImageEnhance.Brightness(pil_img).enhance(level) # operation that overlaps with ImageNet-C's test set def sharpness(pil_img, level, *args): level = float_parameter(sample_level(level), 1.8) + 0.1 return ImageEnhance.Sharpness(pil_img).enhance(level) augmentations = [ autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y, translate_x, translate_y ]