LambdaSuperRes / KAIR /utils /utils_blindsr.py
cooperll
LambdaSuperRes initial commit
2514fb4
raw
history blame
21.2 kB
# -*- coding: utf-8 -*-
import numpy as np
import cv2
import torch
from utils import utils_image as util
import random
from scipy import ndimage
import scipy
import scipy.stats as ss
from scipy.interpolate import interp2d
from scipy.linalg import orth
"""
# --------------------------------------------
# super-resolution
# --------------------------------------------
#
# kai zhang ([email protected])
# https://github.com/cszn
# from 2019/03--2021/08
# --------------------------------------------
"""
def modcrop_np(img, sf):
'''
args:
img: numpy image, wxh or wxhxc
sf: scale factor
return:
cropped image
'''
w, h = img.shape[:2]
im = np.copy(img)
return im[:w - w % sf, :h - h % sf, ...]
"""
# --------------------------------------------
# anisotropic gaussian kernels
# --------------------------------------------
"""
def analytic_kernel(k):
"""calculate the x4 kernel from the x2 kernel (for proof see appendix in paper)"""
k_size = k.shape[0]
# calculate the big kernels size
big_k = np.zeros((3 * k_size - 2, 3 * k_size - 2))
# loop over the small kernel to fill the big one
for r in range(k_size):
for c in range(k_size):
big_k[2 * r:2 * r + k_size, 2 * c:2 * c + k_size] += k[r, c] * k
# crop the edges of the big kernel to ignore very small values and increase run time of sr
crop = k_size // 2
cropped_big_k = big_k[crop:-crop, crop:-crop]
# normalize to 1
return cropped_big_k / cropped_big_k.sum()
def anisotropic_gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
""" generate an anisotropic gaussian kernel
args:
ksize : e.g., 15, kernel size
theta : [0, pi], rotation angle range
l1 : [0.1,50], scaling of eigenvalues
l2 : [0.1,l1], scaling of eigenvalues
if l1 = l2, will get an isotropic gaussian kernel.
returns:
k : kernel
"""
v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1., 0.]))
v = np.array([[v[0], v[1]], [v[1], -v[0]]])
d = np.array([[l1, 0], [0, l2]])
sigma = np.dot(np.dot(v, d), np.linalg.inv(v))
k = gm_blur_kernel(mean=[0, 0], cov=sigma, size=ksize)
return k
def gm_blur_kernel(mean, cov, size=15):
center = size / 2.0 + 0.5
k = np.zeros([size, size])
for y in range(size):
for x in range(size):
cy = y - center + 1
cx = x - center + 1
k[y, x] = ss.multivariate_normal.pdf([cx, cy], mean=mean, cov=cov)
k = k / np.sum(k)
return k
def shift_pixel(x, sf, upper_left=true):
"""shift pixel for super-resolution with different scale factors
args:
x: wxhxc or wxh
sf: scale factor
upper_left: shift direction
"""
h, w = x.shape[:2]
shift = (sf-1)*0.5
xv, yv = np.arange(0, w, 1.0), np.arange(0, h, 1.0)
if upper_left:
x1 = xv + shift
y1 = yv + shift
else:
x1 = xv - shift
y1 = yv - shift
x1 = np.clip(x1, 0, w-1)
y1 = np.clip(y1, 0, h-1)
if x.ndim == 2:
x = interp2d(xv, yv, x)(x1, y1)
if x.ndim == 3:
for i in range(x.shape[-1]):
x[:, :, i] = interp2d(xv, yv, x[:, :, i])(x1, y1)
return x
def blur(x, k):
'''
x: image, nxcxhxw
k: kernel, nx1xhxw
'''
n, c = x.shape[:2]
p1, p2 = (k.shape[-2]-1)//2, (k.shape[-1]-1)//2
x = torch.nn.functional.pad(x, pad=(p1, p2, p1, p2), mode='replicate')
k = k.repeat(1,c,1,1)
k = k.view(-1, 1, k.shape[2], k.shape[3])
x = x.view(1, -1, x.shape[2], x.shape[3])
x = torch.nn.functional.conv2d(x, k, bias=none, stride=1, padding=0, groups=n*c)
x = x.view(n, c, x.shape[2], x.shape[3])
return x
def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10., noise_level=0):
""""
# modified version of https://github.com/assafshocher/blindsr_dataset_generator
# kai zhang
# min_var = 0.175 * sf # variance of the gaussian kernel will be sampled between min_var and max_var
# max_var = 2.5 * sf
"""
# set random eigen-vals (lambdas) and angle (theta) for cov matrix
lambda_1 = min_var + np.random.rand() * (max_var - min_var)
lambda_2 = min_var + np.random.rand() * (max_var - min_var)
theta = np.random.rand() * np.pi # random theta
noise = -noise_level + np.random.rand(*k_size) * noise_level * 2
# set cov matrix using lambdas and theta
lambda = np.diag([lambda_1, lambda_2])
q = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
sigma = q @ lambda @ q.t
inv_sigma = np.linalg.inv(sigma)[none, none, :, :]
# set expectation position (shifting kernel for aligned image)
mu = k_size // 2 - 0.5*(scale_factor - 1) # - 0.5 * (scale_factor - k_size % 2)
mu = mu[none, none, :, none]
# create meshgrid for gaussian
[x,y] = np.meshgrid(range(k_size[0]), range(k_size[1]))
z = np.stack([x, y], 2)[:, :, :, none]
# calcualte gaussian for every pixel of the kernel
zz = z-mu
zz_t = zz.transpose(0,1,3,2)
raw_kernel = np.exp(-0.5 * np.squeeze(zz_t @ inv_sigma @ zz)) * (1 + noise)
# shift the kernel so it will be centered
#raw_kernel_centered = kernel_shift(raw_kernel, scale_factor)
# normalize the kernel and return
#kernel = raw_kernel_centered / np.sum(raw_kernel_centered)
kernel = raw_kernel / np.sum(raw_kernel)
return kernel
def fspecial_gaussian(hsize, sigma):
hsize = [hsize, hsize]
siz = [(hsize[0]-1.0)/2.0, (hsize[1]-1.0)/2.0]
std = sigma
[x, y] = np.meshgrid(np.arange(-siz[1], siz[1]+1), np.arange(-siz[0], siz[0]+1))
arg = -(x*x + y*y)/(2*std*std)
h = np.exp(arg)
h[h < scipy.finfo(float).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h = h/sumh
return h
def fspecial_laplacian(alpha):
alpha = max([0, min([alpha,1])])
h1 = alpha/(alpha+1)
h2 = (1-alpha)/(alpha+1)
h = [[h1, h2, h1], [h2, -4/(alpha+1), h2], [h1, h2, h1]]
h = np.array(h)
return h
def fspecial(filter_type, *args, **kwargs):
'''
python code from:
https://github.com/ronaldosena/imagens-medicas-2/blob/40171a6c259edec7827a6693a93955de2bd39e76/aulas/aula_2_-_uniform_filter/matlab_fspecial.py
'''
if filter_type == 'gaussian':
return fspecial_gaussian(*args, **kwargs)
if filter_type == 'laplacian':
return fspecial_laplacian(*args, **kwargs)
"""
# --------------------------------------------
# degradation models
# --------------------------------------------
"""
def bicubic_degradation(x, sf=3):
'''
args:
x: hxwxc image, [0, 1]
sf: down-scale factor
return:
bicubicly downsampled lr image
'''
x = util.imresize_np(x, scale=1/sf)
return x
def srmd_degradation(x, k, sf=3):
''' blur + bicubic downsampling
args:
x: hxwxc image, [0, 1]
k: hxw, double
sf: down-scale factor
return:
downsampled lr image
reference:
@inproceedings{zhang2018learning,
title={learning a single convolutional super-resolution network for multiple degradations},
author={zhang, kai and zuo, wangmeng and zhang, lei},
booktitle={ieee conference on computer vision and pattern recognition},
pages={3262--3271},
year={2018}
}
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap') # 'nearest' | 'mirror'
x = bicubic_degradation(x, sf=sf)
return x
def dpsr_degradation(x, k, sf=3):
''' bicubic downsampling + blur
args:
x: hxwxc image, [0, 1]
k: hxw, double
sf: down-scale factor
return:
downsampled lr image
reference:
@inproceedings{zhang2019deep,
title={deep plug-and-play super-resolution for arbitrary blur kernels},
author={zhang, kai and zuo, wangmeng and zhang, lei},
booktitle={ieee conference on computer vision and pattern recognition},
pages={1671--1681},
year={2019}
}
'''
x = bicubic_degradation(x, sf=sf)
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
return x
def classical_degradation(x, k, sf=3):
''' blur + downsampling
args:
x: hxwxc image, [0, 1]/[0, 255]
k: hxw, double
sf: down-scale factor
return:
downsampled lr image
'''
x = ndimage.filters.convolve(x, np.expand_dims(k, axis=2), mode='wrap')
#x = filters.correlate(x, np.expand_dims(np.flip(k), axis=2))
st = 0
return x[st::sf, st::sf, ...]
def add_sharpening(img, weight=0.5, radius=50, threshold=10):
"""usm sharpening. borrowed from real-esrgan
input image: i; blurry image: b.
1. k = i + weight * (i - b)
2. mask = 1 if abs(i - b) > threshold, else: 0
3. blur mask:
4. out = mask * k + (1 - mask) * i
args:
img (numpy array): input image, hwc, bgr; float32, [0, 1].
weight (float): sharp weight. default: 1.
radius (float): kernel size of gaussian blur. default: 50.
threshold (int):
"""
if radius % 2 == 0:
radius += 1
blur = cv2.gaussianblur(img, (radius, radius), 0)
residual = img - blur
mask = np.abs(residual) * 255 > threshold
mask = mask.astype('float32')
soft_mask = cv2.gaussianblur(mask, (radius, radius), 0)
k = img + weight * residual
k = np.clip(k, 0, 1)
return soft_mask * k + (1 - soft_mask) * img
def add_blur(img, sf=4):
wd2 = 4.0 + sf
wd = 2.0 + 0.2*sf
if random.random() < 0.5:
l1 = wd2*random.random()
l2 = wd2*random.random()
k = anisotropic_gaussian(ksize=2*random.randint(2,11)+3, theta=random.random()*np.pi, l1=l1, l2=l2)
else:
k = fspecial('gaussian', 2*random.randint(2,11)+3, wd*random.random())
img = ndimage.filters.convolve(img, np.expand_dims(k, axis=2), mode='mirror')
return img
def add_resize(img, sf=4):
rnum = np.random.rand()
if rnum > 0.8: # up
sf1 = random.uniform(1, 2)
elif rnum < 0.7: # down
sf1 = random.uniform(0.5/sf, 1)
else:
sf1 = 1.0
img = cv2.resize(img, (int(sf1*img.shape[1]), int(sf1*img.shape[0])), interpolation=random.choice([1, 2, 3]))
img = np.clip(img, 0.0, 1.0)
return img
def add_gaussian_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
rnum = np.random.rand()
if rnum > 0.6: # add color gaussian noise
img += np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4: # add grayscale gaussian noise
img += np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else: # add noise
l = noise_level2/255.
d = np.diag(np.random.rand(3))
u = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(u), d), u)
img += np.random.multivariate_normal([0,0,0], np.abs(l**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_speckle_noise(img, noise_level1=2, noise_level2=25):
noise_level = random.randint(noise_level1, noise_level2)
img = np.clip(img, 0.0, 1.0)
rnum = random.random()
if rnum > 0.6:
img += img*np.random.normal(0, noise_level/255.0, img.shape).astype(np.float32)
elif rnum < 0.4:
img += img*np.random.normal(0, noise_level/255.0, (*img.shape[:2], 1)).astype(np.float32)
else:
l = noise_level2/255.
d = np.diag(np.random.rand(3))
u = orth(np.random.rand(3,3))
conv = np.dot(np.dot(np.transpose(u), d), u)
img += img*np.random.multivariate_normal([0,0,0], np.abs(l**2*conv), img.shape[:2]).astype(np.float32)
img = np.clip(img, 0.0, 1.0)
return img
def add_poisson_noise(img):
img = np.clip((img * 255.0).round(), 0, 255) / 255.
vals = 10**(2*random.random()+2.0) # [2, 4]
if random.random() < 0.5:
img = np.random.poisson(img * vals).astype(np.float32) / vals
else:
img_gray = np.dot(img[...,:3], [0.299, 0.587, 0.114])
img_gray = np.clip((img_gray * 255.0).round(), 0, 255) / 255.
noise_gray = np.random.poisson(img_gray * vals).astype(np.float32) / vals - img_gray
img += noise_gray[:, :, np.newaxis]
img = np.clip(img, 0.0, 1.0)
return img
def add_jpeg_noise(img):
quality_factor = random.randint(30, 95)
img = cv2.cvtcolor(util.single2uint(img), cv2.color_rgb2bgr)
result, encimg = cv2.imencode('.jpg', img, [int(cv2.imwrite_jpeg_quality), quality_factor])
img = cv2.imdecode(encimg, 1)
img = cv2.cvtcolor(util.uint2single(img), cv2.color_bgr2rgb)
return img
def random_crop(lq, hq, sf=4, lq_patchsize=64):
h, w = lq.shape[:2]
rnd_h = random.randint(0, h-lq_patchsize)
rnd_w = random.randint(0, w-lq_patchsize)
lq = lq[rnd_h:rnd_h + lq_patchsize, rnd_w:rnd_w + lq_patchsize, :]
rnd_h_h, rnd_w_h = int(rnd_h * sf), int(rnd_w * sf)
hq = hq[rnd_h_h:rnd_h_h + lq_patchsize*sf, rnd_w_h:rnd_w_h + lq_patchsize*sf, :]
return lq, hq
def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=none):
"""
this is the degradation model of bsrgan from the paper
"designing a practical degradation model for deep blind image super-resolution"
----------
img: hxwxc, [0, 1], its size should be large than (lq_patchsizexsf)x(lq_patchsizexsf)
sf: scale factor
isp_model: camera isp model
returns
-------
img: low-quality patch, size: lq_patchsizexlq_patchsizexc, range: [0, 1]
hq: corresponding high-quality patch, size: (lq_patchsizexsf)x(lq_patchsizexsf)xc, range: [0, 1]
"""
isp_prob, jpeg_prob, scale2_prob = 0.25, 0.9, 0.25
sf_ori = sf
h1, w1 = img.shape[:2]
img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
h, w = img.shape[:2]
if h < lq_patchsize*sf or w < lq_patchsize*sf:
raise valueerror(f'img size ({h1}x{w1}) is too small!')
hq = img.copy()
if sf == 4 and random.random() < scale2_prob: # downsample1
if np.random.rand() < 0.5:
img = cv2.resize(img, (int(1/2*img.shape[1]), int(1/2*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
img = util.imresize_np(img, 1/2, true)
img = np.clip(img, 0.0, 1.0)
sf = 2
shuffle_order = random.sample(range(7), 7)
idx1, idx2 = shuffle_order.index(2), shuffle_order.index(3)
if idx1 > idx2: # keep downsample3 last
shuffle_order[idx1], shuffle_order[idx2] = shuffle_order[idx2], shuffle_order[idx1]
for i in shuffle_order:
if i == 0:
img = add_blur(img, sf=sf)
elif i == 1:
img = add_blur(img, sf=sf)
elif i == 2:
a, b = img.shape[1], img.shape[0]
# downsample2
if random.random() < 0.75:
sf1 = random.uniform(1,2*sf)
img = cv2.resize(img, (int(1/sf1*img.shape[1]), int(1/sf1*img.shape[0])), interpolation=random.choice([1,2,3]))
else:
k = fspecial('gaussian', 25, random.uniform(0.1, 0.6*sf))
k_shifted = shift_pixel(k, sf)
k_shifted = k_shifted/k_shifted.sum() # blur with shifted kernel
img = ndimage.filters.convolve(img, np.expand_dims(k_shifted, axis=2), mode='mirror')
img = img[0::sf, 0::sf, ...] # nearest downsampling
img = np.clip(img, 0.0, 1.0)
elif i == 3:
# downsample3
img = cv2.resize(img, (int(1/sf*a), int(1/sf*b)), interpolation=random.choice([1,2,3]))
img = np.clip(img, 0.0, 1.0)
elif i == 4:
# add gaussian noise
img = add_gaussian_noise(img, noise_level1=2, noise_level2=25)
elif i == 5:
# add jpeg noise
if random.random() < jpeg_prob:
img = add_jpeg_noise(img)
elif i == 6:
# add processed camera sensor noise
if random.random() < isp_prob and isp_model is not none:
with torch.no_grad():
img, hq = isp_model.forward(img.copy(), hq)
# add final jpeg compression noise
img = add_jpeg_noise(img)
# random crop
img, hq = random_crop(img, hq, sf_ori, lq_patchsize)
return img, hq
def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=false, lq_patchsize=64, isp_model=none):
"""
this is an extended degradation model by combining
the degradation models of bsrgan and real-esrgan
----------
img: hxwxc, [0, 1], its size should be large than (lq_patchsizexsf)x(lq_patchsizexsf)
sf: scale factor
use_shuffle: the degradation shuffle
use_sharp: sharpening the img
returns
-------
img: low-quality patch, size: lq_patchsizexlq_patchsizexc, range: [0, 1]
hq: corresponding high-quality patch, size: (lq_patchsizexsf)x(lq_patchsizexsf)xc, range: [0, 1]
"""
h1, w1 = img.shape[:2]
img = img.copy()[:w1 - w1 % sf, :h1 - h1 % sf, ...] # mod crop
h, w = img.shape[:2]
if h < lq_patchsize*sf or w < lq_patchsize*sf:
raise valueerror(f'img size ({h1}x{w1}) is too small!')
if use_sharp:
img = add_sharpening(img)
hq = img.copy()
if random.random() < shuffle_prob:
shuffle_order = random.sample(range(13), 13)
else:
shuffle_order = list(range(13))
# local shuffle for noise, jpeg is always the last one
shuffle_order[2:6] = random.sample(shuffle_order[2:6], len(range(2, 6)))
shuffle_order[9:13] = random.sample(shuffle_order[9:13], len(range(9, 13)))
poisson_prob, speckle_prob, isp_prob = 0.1, 0.1, 0.1
for i in shuffle_order:
if i == 0:
img = add_blur(img, sf=sf)
elif i == 1:
img = add_resize(img, sf=sf)
elif i == 2:
img = add_gaussian_noise(img, noise_level1=2, noise_level2=25)
elif i == 3:
if random.random() < poisson_prob:
img = add_poisson_noise(img)
elif i == 4:
if random.random() < speckle_prob:
img = add_speckle_noise(img)
elif i == 5:
if random.random() < isp_prob and isp_model is not none:
with torch.no_grad():
img, hq = isp_model.forward(img.copy(), hq)
elif i == 6:
img = add_jpeg_noise(img)
elif i == 7:
img = add_blur(img, sf=sf)
elif i == 8:
img = add_resize(img, sf=sf)
elif i == 9:
img = add_gaussian_noise(img, noise_level1=2, noise_level2=25)
elif i == 10:
if random.random() < poisson_prob:
img = add_poisson_noise(img)
elif i == 11:
if random.random() < speckle_prob:
img = add_speckle_noise(img)
elif i == 12:
if random.random() < isp_prob and isp_model is not none:
with torch.no_grad():
img, hq = isp_model.forward(img.copy(), hq)
else:
print('check the shuffle!')
# resize to desired size
img = cv2.resize(img, (int(1/sf*hq.shape[1]), int(1/sf*hq.shape[0])), interpolation=random.choice([1, 2, 3]))
# add final jpeg compression noise
img = add_jpeg_noise(img)
# random crop
img, hq = random_crop(img, hq, sf, lq_patchsize)
return img, hq
if __name__ == '__main__':
img = util.imread_uint('utils/test.png', 3)
img = util.uint2single(img)
sf = 4
for i in range(20):
img_lq, img_hq = degradation_bsrgan(img, sf=sf, lq_patchsize=72)
print(i)
lq_nearest = cv2.resize(util.single2uint(img_lq), (int(sf*img_lq.shape[1]), int(sf*img_lq.shape[0])), interpolation=0)
img_concat = np.concatenate([lq_nearest, util.single2uint(img_hq)], axis=1)
util.imsave(img_concat, str(i)+'.png')
# for i in range(10):
# img_lq, img_hq = degradation_bsrgan_plus(img, sf=sf, shuffle_prob=0.1, use_sharp=true, lq_patchsize=64)
# print(i)
# lq_nearest = cv2.resize(util.single2uint(img_lq), (int(sf*img_lq.shape[1]), int(sf*img_lq.shape[0])), interpolation=0)
# img_concat = np.concatenate([lq_nearest, util.single2uint(img_hq)], axis=1)
# util.imsave(img_concat, str(i)+'.png')
# run utils/utils_blindsr.py