Spaces:
Sleeping
Sleeping
| import math | |
| import warnings | |
| import random | |
| import numbers | |
| import numpy as np | |
| from PIL import Image, ImageFilter | |
| from collections.abc import Sequence | |
| import torch | |
| import torchvision.transforms.functional as TF | |
| _pil_interpolation_to_str = { | |
| Image.NEAREST: 'PIL.Image.NEAREST', | |
| Image.BILINEAR: 'PIL.Image.BILINEAR', | |
| Image.BICUBIC: 'PIL.Image.BICUBIC', | |
| Image.LANCZOS: 'PIL.Image.LANCZOS', | |
| Image.HAMMING: 'PIL.Image.HAMMING', | |
| Image.BOX: 'PIL.Image.BOX', | |
| } | |
| def _get_image_size(img): | |
| if TF._is_pil_image(img): | |
| return img.size | |
| elif isinstance(img, torch.Tensor) and img.dim() > 2: | |
| return img.shape[-2:][::-1] | |
| else: | |
| raise TypeError("Unexpected type {}".format(type(img))) | |
| class RandomHorizontalFlip(object): | |
| """Horizontal flip the given PIL Image randomly with a given probability. | |
| Args: | |
| p (float): probability of the image being flipped. Default value is 0.5 | |
| """ | |
| def __init__(self, p=0.5): | |
| self.p = p | |
| def __call__(self, img, mask): | |
| """ | |
| Args: | |
| img (PIL Image): Image to be flipped. | |
| Returns: | |
| PIL Image: Randomly flipped image. | |
| """ | |
| if random.random() < self.p: | |
| img = TF.hflip(img) | |
| mask = TF.hflip(mask) | |
| return img, mask | |
| def __repr__(self): | |
| return self.__class__.__name__ + '(p={})'.format(self.p) | |
| class RandomVerticalFlip(object): | |
| """Vertical flip the given PIL Image randomly with a given probability. | |
| Args: | |
| p (float): probability of the image being flipped. Default value is 0.5 | |
| """ | |
| def __init__(self, p=0.5): | |
| self.p = p | |
| def __call__(self, img, mask): | |
| """ | |
| Args: | |
| img (PIL Image): Image to be flipped. | |
| Returns: | |
| PIL Image: Randomly flipped image. | |
| """ | |
| if random.random() < self.p: | |
| img = TF.vflip(img) | |
| mask = TF.vflip(mask) | |
| return img, mask | |
| def __repr__(self): | |
| return self.__class__.__name__ + '(p={})'.format(self.p) | |
| class GaussianBlur(object): | |
| """Gaussian blur augmentation from SimCLR: https://arxiv.org/abs/2002.05709""" | |
| def __init__(self, sigma=[.1, 2.]): | |
| self.sigma = sigma | |
| def __call__(self, x): | |
| sigma = random.uniform(self.sigma[0], self.sigma[1]) | |
| x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) | |
| return x | |
| class RandomAffine(object): | |
| """Random affine transformation of the image keeping center invariant | |
| Args: | |
| degrees (sequence or float or int): Range of degrees to select from. | |
| If degrees is a number instead of sequence like (min, max), the range of degrees | |
| will be (-degrees, +degrees). Set to 0 to deactivate rotations. | |
| translate (tuple, optional): tuple of maximum absolute fraction for horizontal | |
| and vertical translations. For example translate=(a, b), then horizontal shift | |
| is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is | |
| randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. | |
| scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is | |
| randomly sampled from the range a <= scale <= b. Will keep original scale by default. | |
| shear (sequence or float or int, optional): Range of degrees to select from. | |
| If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) | |
| will be apllied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the | |
| range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, | |
| a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. | |
| Will not apply shear by default | |
| resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): | |
| An optional resampling filter. See `filters`_ for more information. | |
| If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. | |
| fillcolor (tuple or int): Optional fill color (Tuple for RGB Image And int for grayscale) for the area | |
| outside the transform in the output image.(Pillow>=5.0.0) | |
| .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters | |
| """ | |
| def __init__(self, | |
| degrees, | |
| translate=None, | |
| scale=None, | |
| shear=None, | |
| resample=False, | |
| fillcolor=0): | |
| if isinstance(degrees, numbers.Number): | |
| if degrees < 0: | |
| raise ValueError( | |
| "If degrees is a single number, it must be positive.") | |
| self.degrees = (-degrees, degrees) | |
| else: | |
| assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \ | |
| "degrees should be a list or tuple and it must be of length 2." | |
| self.degrees = degrees | |
| if translate is not None: | |
| assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ | |
| "translate should be a list or tuple and it must be of length 2." | |
| for t in translate: | |
| if not (0.0 <= t <= 1.0): | |
| raise ValueError( | |
| "translation values should be between 0 and 1") | |
| self.translate = translate | |
| if scale is not None: | |
| assert isinstance(scale, (tuple, list)) and len(scale) == 2, \ | |
| "scale should be a list or tuple and it must be of length 2." | |
| for s in scale: | |
| if s <= 0: | |
| raise ValueError("scale values should be positive") | |
| self.scale = scale | |
| if shear is not None: | |
| if isinstance(shear, numbers.Number): | |
| if shear < 0: | |
| raise ValueError( | |
| "If shear is a single number, it must be positive.") | |
| self.shear = (-shear, shear) | |
| else: | |
| assert isinstance(shear, (tuple, list)) and \ | |
| (len(shear) == 2 or len(shear) == 4), \ | |
| "shear should be a list or tuple and it must be of length 2 or 4." | |
| # X-Axis shear with [min, max] | |
| if len(shear) == 2: | |
| self.shear = [shear[0], shear[1], 0., 0.] | |
| elif len(shear) == 4: | |
| self.shear = [s for s in shear] | |
| else: | |
| self.shear = shear | |
| self.resample = resample | |
| self.fillcolor = fillcolor | |
| def get_params(degrees, translate, scale_ranges, shears, img_size): | |
| """Get parameters for affine transformation | |
| Returns: | |
| sequence: params to be passed to the affine transformation | |
| """ | |
| angle = random.uniform(degrees[0], degrees[1]) | |
| if translate is not None: | |
| max_dx = translate[0] * img_size[0] | |
| max_dy = translate[1] * img_size[1] | |
| translations = (np.round(random.uniform(-max_dx, max_dx)), | |
| np.round(random.uniform(-max_dy, max_dy))) | |
| else: | |
| translations = (0, 0) | |
| if scale_ranges is not None: | |
| scale = random.uniform(scale_ranges[0], scale_ranges[1]) | |
| else: | |
| scale = 1.0 | |
| if shears is not None: | |
| if len(shears) == 2: | |
| shear = [random.uniform(shears[0], shears[1]), 0.] | |
| elif len(shears) == 4: | |
| shear = [ | |
| random.uniform(shears[0], shears[1]), | |
| random.uniform(shears[2], shears[3]) | |
| ] | |
| else: | |
| shear = 0.0 | |
| return angle, translations, scale, shear | |
| def __call__(self, img, mask): | |
| """ | |
| img (PIL Image): Image to be transformed. | |
| Returns: | |
| PIL Image: Affine transformed image. | |
| """ | |
| ret = self.get_params(self.degrees, self.translate, self.scale, | |
| self.shear, img.size) | |
| img = TF.affine(img, | |
| *ret, | |
| resample=self.resample, | |
| fillcolor=self.fillcolor) | |
| mask = TF.affine(mask, *ret, resample=Image.NEAREST, fillcolor=0) | |
| return img, mask | |
| def __repr__(self): | |
| s = '{name}(degrees={degrees}' | |
| if self.translate is not None: | |
| s += ', translate={translate}' | |
| if self.scale is not None: | |
| s += ', scale={scale}' | |
| if self.shear is not None: | |
| s += ', shear={shear}' | |
| if self.resample > 0: | |
| s += ', resample={resample}' | |
| if self.fillcolor != 0: | |
| s += ', fillcolor={fillcolor}' | |
| s += ')' | |
| d = dict(self.__dict__) | |
| d['resample'] = _pil_interpolation_to_str[d['resample']] | |
| return s.format(name=self.__class__.__name__, **d) | |
| class RandomCrop(object): | |
| """Crop the given PIL Image at a random location. | |
| Args: | |
| size (sequence or int): Desired output size of the crop. If size is an | |
| int instead of sequence like (h, w), a square crop (size, size) is | |
| made. | |
| padding (int or sequence, optional): Optional padding on each border | |
| of the image. Default is None, i.e no padding. If a sequence of length | |
| 4 is provided, it is used to pad left, top, right, bottom borders | |
| respectively. If a sequence of length 2 is provided, it is used to | |
| pad left/right, top/bottom borders, respectively. | |
| pad_if_needed (boolean): It will pad the image if smaller than the | |
| desired size to avoid raising an exception. Since cropping is done | |
| after padding, the padding seems to be done at a random offset. | |
| fill: Pixel fill value for constant fill. Default is 0. If a tuple of | |
| length 3, it is used to fill R, G, B channels respectively. | |
| This value is only used when the padding_mode is constant | |
| padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. | |
| - constant: pads with a constant value, this value is specified with fill | |
| - edge: pads with the last value on the edge of the image | |
| - reflect: pads with reflection of image (without repeating the last value on the edge) | |
| padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode | |
| will result in [3, 2, 1, 2, 3, 4, 3, 2] | |
| - symmetric: pads with reflection of image (repeating the last value on the edge) | |
| padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode | |
| will result in [2, 1, 1, 2, 3, 4, 4, 3] | |
| """ | |
| def __init__(self, | |
| size, | |
| padding=None, | |
| pad_if_needed=False, | |
| fill=0, | |
| padding_mode='constant'): | |
| if isinstance(size, numbers.Number): | |
| self.size = (int(size), int(size)) | |
| else: | |
| self.size = size | |
| self.padding = padding | |
| self.pad_if_needed = pad_if_needed | |
| self.fill = fill | |
| self.padding_mode = padding_mode | |
| def get_params(img, output_size): | |
| """Get parameters for ``crop`` for a random crop. | |
| Args: | |
| img (PIL Image): Image to be cropped. | |
| output_size (tuple): Expected output size of the crop. | |
| Returns: | |
| tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. | |
| """ | |
| w, h = _get_image_size(img) | |
| th, tw = output_size | |
| if w == tw and h == th: | |
| return 0, 0, h, w | |
| i = random.randint(0, h - th) | |
| j = random.randint(0, w - tw) | |
| return i, j, th, tw | |
| def __call__(self, img, mask): | |
| """ | |
| Args: | |
| img (PIL Image): Image to be cropped. | |
| Returns: | |
| PIL Image: Cropped image. | |
| """ | |
| # if self.padding is not None: | |
| # img = TF.pad(img, self.padding, self.fill, self.padding_mode) | |
| # | |
| # # pad the width if needed | |
| # if self.pad_if_needed and img.size[0] < self.size[1]: | |
| # img = TF.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) | |
| # # pad the height if needed | |
| # if self.pad_if_needed and img.size[1] < self.size[0]: | |
| # img = TF.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode) | |
| i, j, h, w = self.get_params(img, self.size) | |
| img = TF.crop(img, i, j, h, w) | |
| mask = TF.crop(mask, i, j, h, w) | |
| return img, mask | |
| def __repr__(self): | |
| return self.__class__.__name__ + '(size={0}, padding={1})'.format( | |
| self.size, self.padding) | |
| class RandomResizedCrop(object): | |
| """Crop the given PIL Image to random size and aspect ratio. | |
| A crop of random size (default: of 0.08 to 1.0) of the original size and a random | |
| aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop | |
| is finally resized to given size. | |
| This is popularly used to train the Inception networks. | |
| Args: | |
| size: expected output size of each edge | |
| scale: range of size of the origin size cropped | |
| ratio: range of aspect ratio of the origin aspect ratio cropped | |
| interpolation: Default: PIL.Image.BILINEAR | |
| """ | |
| def __init__(self, | |
| size, | |
| scale=(0.08, 1.0), | |
| ratio=(3. / 4., 4. / 3.), | |
| interpolation=Image.BILINEAR): | |
| if isinstance(size, (tuple, list)): | |
| self.size = size | |
| else: | |
| self.size = (size, size) | |
| if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): | |
| warnings.warn("range should be of kind (min, max)") | |
| self.interpolation = interpolation | |
| self.scale = scale | |
| self.ratio = ratio | |
| def get_params(img, scale, ratio): | |
| """Get parameters for ``crop`` for a random sized crop. | |
| Args: | |
| img (PIL Image): Image to be cropped. | |
| scale (tuple): range of size of the origin size cropped | |
| ratio (tuple): range of aspect ratio of the origin aspect ratio cropped | |
| Returns: | |
| tuple: params (i, j, h, w) to be passed to ``crop`` for a random | |
| sized crop. | |
| """ | |
| width, height = _get_image_size(img) | |
| area = height * width | |
| for _ in range(10): | |
| target_area = random.uniform(*scale) * area | |
| log_ratio = (math.log(ratio[0]), math.log(ratio[1])) | |
| aspect_ratio = math.exp(random.uniform(*log_ratio)) | |
| w = int(round(math.sqrt(target_area * aspect_ratio))) | |
| h = int(round(math.sqrt(target_area / aspect_ratio))) | |
| if 0 < w <= width and 0 < h <= height: | |
| i = random.randint(0, height - h) | |
| j = random.randint(0, width - w) | |
| return i, j, h, w | |
| # Fallback to central crop | |
| in_ratio = float(width) / float(height) | |
| if (in_ratio < min(ratio)): | |
| w = width | |
| h = int(round(w / min(ratio))) | |
| elif (in_ratio > max(ratio)): | |
| h = height | |
| w = int(round(h * max(ratio))) | |
| else: # whole image | |
| w = width | |
| h = height | |
| i = (height - h) // 2 | |
| j = (width - w) // 2 | |
| return i, j, h, w | |
| def __call__(self, img, mask): | |
| """ | |
| Args: | |
| img (PIL Image): Image to be cropped and resized. | |
| Returns: | |
| PIL Image: Randomly cropped and resized image. | |
| """ | |
| i, j, h, w = self.get_params(img, self.scale, self.ratio) | |
| # print(i, j, h, w) | |
| img = TF.resized_crop(img, i, j, h, w, self.size, self.interpolation) | |
| mask = TF.resized_crop(mask, i, j, h, w, self.size, Image.NEAREST) | |
| return img, mask | |
| def __repr__(self): | |
| interpolate_str = _pil_interpolation_to_str[self.interpolation] | |
| format_string = self.__class__.__name__ + '(size={0}'.format(self.size) | |
| format_string += ', scale={0}'.format( | |
| tuple(round(s, 4) for s in self.scale)) | |
| format_string += ', ratio={0}'.format( | |
| tuple(round(r, 4) for r in self.ratio)) | |
| format_string += ', interpolation={0})'.format(interpolate_str) | |
| return format_string | |
| class ToOnehot(object): | |
| """To oneshot tensor | |
| Args: | |
| max_obj_n (float): Maximum number of the objects | |
| """ | |
| def __init__(self, max_obj_n, shuffle): | |
| self.max_obj_n = max_obj_n | |
| self.shuffle = shuffle | |
| def __call__(self, mask, obj_list=None): | |
| """ | |
| Args: | |
| mask (Mask in Numpy): Mask to be converted. | |
| Returns: | |
| Tensor: Converted mask in onehot format. | |
| """ | |
| new_mask = np.zeros((self.max_obj_n + 1, *mask.shape), np.uint8) | |
| if not obj_list: | |
| obj_list = list() | |
| obj_max = mask.max() + 1 | |
| for i in range(1, obj_max): | |
| tmp = (mask == i).astype(np.uint8) | |
| if tmp.max() > 0: | |
| obj_list.append(i) | |
| if self.shuffle: | |
| random.shuffle(obj_list) | |
| obj_list = obj_list[:self.max_obj_n] | |
| for i in range(len(obj_list)): | |
| new_mask[i + 1] = (mask == obj_list[i]).astype(np.uint8) | |
| new_mask[0] = 1 - np.sum(new_mask, axis=0) | |
| return torch.from_numpy(new_mask), obj_list | |
| def __repr__(self): | |
| return self.__class__.__name__ + '(max_obj_n={})'.format( | |
| self.max_obj_n) | |
| class Resize(torch.nn.Module): | |
| """Resize the input image to the given size. | |
| The image can be a PIL Image or a torch Tensor, in which case it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions | |
| Args: | |
| size (sequence or int): Desired output size. If size is a sequence like | |
| (h, w), output size will be matched to this. If size is an int, | |
| smaller edge of the image will be matched to this number. | |
| i.e, if height > width, then image will be rescaled to | |
| (size * height / width, size). | |
| In torchscript mode padding as single int is not supported, use a tuple or | |
| list of length 1: ``[size, ]``. | |
| interpolation (int, optional): Desired interpolation enum defined by `filters`_. | |
| Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR`` | |
| and ``PIL.Image.BICUBIC`` are supported. | |
| """ | |
| def __init__(self, size, interpolation=Image.BILINEAR): | |
| super().__init__() | |
| if not isinstance(size, (int, Sequence)): | |
| raise TypeError("Size should be int or sequence. Got {}".format( | |
| type(size))) | |
| if isinstance(size, Sequence) and len(size) not in (1, 2): | |
| raise ValueError( | |
| "If size is a sequence, it should have 1 or 2 values") | |
| self.size = size | |
| self.interpolation = interpolation | |
| def forward(self, img, mask): | |
| """ | |
| Args: | |
| img (PIL Image or Tensor): Image to be scaled. | |
| Returns: | |
| PIL Image or Tensor: Rescaled image. | |
| """ | |
| img = TF.resize(img, self.size, self.interpolation) | |
| mask = TF.resize(mask, self.size, Image.NEAREST) | |
| return img, mask | |
| def __repr__(self): | |
| interpolate_str = _pil_interpolation_to_str[self.interpolation] | |
| return self.__class__.__name__ + '(size={0}, interpolation={1})'.format( | |
| self.size, interpolate_str) | |