Spaces:
Running
Running
File size: 4,113 Bytes
499e141 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# -*- coding: utf-8 -*-
# @Author : xuelun
import cv2
import torch
import numpy as np
# ------------
# DATA TOOLS
# ------------
def imread_gray(path, augment_fn=None):
if augment_fn is None:
image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
else:
image = cv2.imread(str(path), cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = augment_fn(image)
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
return image # (h, w)
def imread_color(path, augment_fn=None):
if augment_fn is None:
image = cv2.imread(str(path), cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
image = cv2.imread(str(path), cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = augment_fn(image)
return image # (h, w)
def get_resized_wh(w, h, resize=None):
if resize is not None: # resize the longer edge
scale = resize / max(h, w)
w_new, h_new = int(round(w*scale)), int(round(h*scale))
else:
w_new, h_new = w, h
return w_new, h_new
def get_divisible_wh(w, h, df=None):
if df is not None:
w_new = max((w // df), 1) * df
h_new = max((h // df), 1) * df
# resize = int(max(max(w, h) // df, 1) * df)
# w_new, h_new = get_resized_wh(w, h, resize)
# scale = resize / x
# w_new, h_new = map(lambda x: int(max(x // df, 1) * df), [w, h])
else:
w_new, h_new = w, h
return w_new, h_new
def pad_bottom_right(inp, pad_size, ret_mask=False):
assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}"
mask = None
if inp.ndim == 2:
padded = np.zeros((pad_size, pad_size), dtype=inp.dtype)
padded[:inp.shape[0], :inp.shape[1]] = inp
elif inp.ndim == 3:
padded = np.zeros((pad_size, pad_size, inp.shape[-1]), dtype=inp.dtype)
padded[:inp.shape[0], :inp.shape[1]] = inp
else:
raise NotImplementedError()
if ret_mask:
mask = np.zeros((pad_size, pad_size), dtype=bool)
mask[:inp.shape[0], :inp.shape[1]] = True
return padded, mask
def split(n, k):
d, r = divmod(n, k)
return [d + 1] * r + [d] * (k - r)
def read_images(path, max_resize, df, padding, augment_fn=None, image=None):
"""
Args:
path: string
max_resize (int): max image size after resied
df (int, optional): image size division factor.
NOTE: this will change the final image size after img_resize
padding (bool): If set to 'True', zero-pad resized images to squared size.
augment_fn (callable, optional): augments images with pre-defined visual effects
image: RGB image
Returns:
image (torch.tensor): (1, h, w)
mask (torch.tensor): (h, w)
scale (torch.tensor): [w/w_new, h/h_new]
"""
# read image
assert max_resize is not None
image = imread_color(path, augment_fn) if image is None else image # (w,h,3) image is RGB
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# resize image
w, h = image.shape[1], image.shape[0]
if max(w, h) > max_resize:
w_new, h_new = get_resized_wh(w, h, max_resize) # make max(w, h) to max_size
else:
w_new, h_new = w, h
w_new, h_new = get_divisible_wh(w_new, h_new, df) # make image divided by df and must <= max_size
image = cv2.resize(image, (w_new, h_new)) # (w',h',3)
gray = cv2.resize(gray, (w_new, h_new)) # (w',h',3)
scale = torch.tensor([w / w_new, h / h_new], dtype=torch.float)
# padding
mask = None
if padding:
image, _ = pad_bottom_right(image, max_resize, ret_mask=False)
gray, mask = pad_bottom_right(gray, max_resize, ret_mask=True)
mask = torch.from_numpy(mask)
gray = torch.from_numpy(gray).float()[None] / 255 # (1,h,w)
image = torch.from_numpy(image).float() / 255 # (h,w,3)
image = image.permute(2,0,1) # (3,h,w)
resize = [h_new, w_new]
return gray, image, scale, resize, mask
|