ohamlab-ai-toolkit

Running

App Files Files Community

ohamlab-ai-toolkit / toolkit /lorm.py

rahul7star

boilerplate

fcc02a2 verified 5 months ago

raw

history blame

15.9 kB

	from typing import Union, Tuple, Literal, Optional

	import torch
	import torch.nn as nn
	from diffusers import UNet2DConditionModel
	from torch import Tensor
	from tqdm import tqdm

	from toolkit.config_modules import LoRMConfig

	conv = nn.Conv2d
	lin = nn.Linear
	_size_2_t = Union[int, Tuple[int, int]]

	ExtractMode = Union[
	'fixed',
	'threshold',
	'ratio',
	'quantile',
	'percentage'
	]

	LINEAR_MODULES = [
	'Linear',
	'LoRACompatibleLinear'
	]
	CONV_MODULES = [
	# 'Conv2d',
	# 'LoRACompatibleConv'
	]

	UNET_TARGET_REPLACE_MODULE = [
	"Transformer2DModel",
	# "ResnetBlock2D",
	"Downsample2D",
	"Upsample2D",
	]

	LORM_TARGET_REPLACE_MODULE = UNET_TARGET_REPLACE_MODULE

	UNET_TARGET_REPLACE_NAME = [
	"conv_in",
	"conv_out",
	"time_embedding.linear_1",
	"time_embedding.linear_2",
	]

	UNET_MODULES_TO_AVOID = [
	]


	# Low Rank Convolution
	class LoRMCon2d(nn.Module):
	def __init__(
	self,
	in_channels: int,
	lorm_channels: int,
	out_channels: int,
	kernel_size: _size_2_t,
	stride: _size_2_t = 1,
	padding: Union[str, _size_2_t] = 'same',
	dilation: _size_2_t = 1,
	groups: int = 1,
	bias: bool = True,
	padding_mode: str = 'zeros',
	device=None,
	dtype=None
	) -> None:
	super().__init__()
	self.in_channels = in_channels
	self.lorm_channels = lorm_channels
	self.out_channels = out_channels
	self.kernel_size = kernel_size
	self.stride = stride
	self.padding = padding
	self.dilation = dilation
	self.groups = groups
	self.padding_mode = padding_mode

	self.down = nn.Conv2d(
	in_channels=in_channels,
	out_channels=lorm_channels,
	kernel_size=kernel_size,
	stride=stride,
	padding=padding,
	dilation=dilation,
	groups=groups,
	bias=False,
	padding_mode=padding_mode,
	device=device,
	dtype=dtype
	)

	# Kernel size on the up is always 1x1.
	# I don't think you could calculate a dual 3x3, or I can't at least

	self.up = nn.Conv2d(
	in_channels=lorm_channels,
	out_channels=out_channels,
	kernel_size=(1, 1),
	stride=1,
	padding='same',
	dilation=1,
	groups=1,
	bias=bias,
	padding_mode='zeros',
	device=device,
	dtype=dtype
	)

	def forward(self, input: Tensor, args, *kwargs) -> Tensor:
	x = input
	x = self.down(x)
	x = self.up(x)
	return x


	class LoRMLinear(nn.Module):
	def __init__(
	self,
	in_features: int,
	lorm_features: int,
	out_features: int,
	bias: bool = True,
	device=None,
	dtype=None
	) -> None:
	super().__init__()
	self.in_features = in_features
	self.lorm_features = lorm_features
	self.out_features = out_features

	self.down = nn.Linear(
	in_features=in_features,
	out_features=lorm_features,
	bias=False,
	device=device,
	dtype=dtype

	)
	self.up = nn.Linear(
	in_features=lorm_features,
	out_features=out_features,
	bias=bias,
	# bias=True,
	device=device,
	dtype=dtype
	)

	def forward(self, input: Tensor, args, *kwargs) -> Tensor:
	x = input
	x = self.down(x)
	x = self.up(x)
	return x


	def extract_conv(
	weight: Union[torch.Tensor, nn.Parameter],
	mode='fixed',
	mode_param=0,
	device='cpu'
	) -> Tuple[Tensor, Tensor, int, Tensor]:
	weight = weight.to(device)
	out_ch, in_ch, kernel_size, _ = weight.shape

	U, S, Vh = torch.linalg.svd(weight.reshape(out_ch, -1))
	if mode == 'percentage':
	assert 0 <= mode_param <= 1 # Ensure it's a valid percentage.
	original_params = out_ch * in_ch * kernel_size * kernel_size
	desired_params = mode_param * original_params
	# Solve for lora_rank from the equation
	lora_rank = int(desired_params / (in_ch * kernel_size * kernel_size + out_ch))
	elif mode == 'fixed':
	lora_rank = mode_param
	elif mode == 'threshold':
	assert mode_param >= 0
	lora_rank = torch.sum(S > mode_param).item()
	elif mode == 'ratio':
	assert 1 >= mode_param >= 0
	min_s = torch.max(S) * mode_param
	lora_rank = torch.sum(S > min_s).item()
	elif mode == 'quantile' or mode == 'percentile':
	assert 1 >= mode_param >= 0
	s_cum = torch.cumsum(S, dim=0)
	min_cum_sum = mode_param * torch.sum(S)
	lora_rank = torch.sum(s_cum < min_cum_sum).item()
	else:
	raise NotImplementedError('Extract mode should be "fixed", "threshold", "ratio" or "quantile"')
	lora_rank = max(1, lora_rank)
	lora_rank = min(out_ch, in_ch, lora_rank)
	if lora_rank >= out_ch / 2:
	lora_rank = int(out_ch / 2)
	print(f"rank is higher than it should be")
	# print(f"Skipping layer as determined rank is too high")
	# return None, None, None, None
	# return weight, 'full'

	U = U[:, :lora_rank]
	S = S[:lora_rank]
	U = U @ torch.diag(S)
	Vh = Vh[:lora_rank, :]

	diff = (weight - (U @ Vh).reshape(out_ch, in_ch, kernel_size, kernel_size)).detach()
	extract_weight_A = Vh.reshape(lora_rank, in_ch, kernel_size, kernel_size).detach()
	extract_weight_B = U.reshape(out_ch, lora_rank, 1, 1).detach()
	del U, S, Vh, weight
	return extract_weight_A, extract_weight_B, lora_rank, diff


	def extract_linear(
	weight: Union[torch.Tensor, nn.Parameter],
	mode='fixed',
	mode_param=0,
	device='cpu',
	) -> Tuple[Tensor, Tensor, int, Tensor]:
	weight = weight.to(device)
	out_ch, in_ch = weight.shape

	U, S, Vh = torch.linalg.svd(weight)

	if mode == 'percentage':
	assert 0 <= mode_param <= 1 # Ensure it's a valid percentage.
	desired_params = mode_param * out_ch * in_ch
	# Solve for lora_rank from the equation
	lora_rank = int(desired_params / (in_ch + out_ch))
	elif mode == 'fixed':
	lora_rank = mode_param
	elif mode == 'threshold':
	assert mode_param >= 0
	lora_rank = torch.sum(S > mode_param).item()
	elif mode == 'ratio':
	assert 1 >= mode_param >= 0
	min_s = torch.max(S) * mode_param
	lora_rank = torch.sum(S > min_s).item()
	elif mode == 'quantile':
	assert 1 >= mode_param >= 0
	s_cum = torch.cumsum(S, dim=0)
	min_cum_sum = mode_param * torch.sum(S)
	lora_rank = torch.sum(s_cum < min_cum_sum).item()
	else:
	raise NotImplementedError('Extract mode should be "fixed", "threshold", "ratio" or "quantile"')
	lora_rank = max(1, lora_rank)
	lora_rank = min(out_ch, in_ch, lora_rank)
	if lora_rank >= out_ch / 2:
	# print(f"rank is higher than it should be")
	lora_rank = int(out_ch / 2)
	# return weight, 'full'
	# print(f"Skipping layer as determined rank is too high")
	# return None, None, None, None

	U = U[:, :lora_rank]
	S = S[:lora_rank]
	U = U @ torch.diag(S)
	Vh = Vh[:lora_rank, :]

	diff = (weight - U @ Vh).detach()
	extract_weight_A = Vh.reshape(lora_rank, in_ch).detach()
	extract_weight_B = U.reshape(out_ch, lora_rank).detach()
	del U, S, Vh, weight
	return extract_weight_A, extract_weight_B, lora_rank, diff


	def replace_module_by_path(network, name, module):
	"""Replace a module in a network by its name."""
	name_parts = name.split('.')
	current_module = network
	for part in name_parts[:-1]:
	current_module = getattr(current_module, part)
	try:
	setattr(current_module, name_parts[-1], module)
	except Exception as e:
	print(e)


	def count_parameters(module):
	return sum(p.numel() for p in module.parameters())


	def compute_optimal_bias(original_module, linear_down, linear_up, X):
	Y_original = original_module(X)
	Y_approx = linear_up(linear_down(X))
	E = Y_original - Y_approx

	optimal_bias = E.mean(dim=0)

	return optimal_bias


	def format_with_commas(n):
	return f"{n:,}"


	def print_lorm_extract_details(
	start_num_params: int,
	end_num_params: int,
	num_replaced: int,
	):
	start_formatted = format_with_commas(start_num_params)
	end_formatted = format_with_commas(end_num_params)
	num_replaced_formatted = format_with_commas(num_replaced)

	width = max(len(start_formatted), len(end_formatted), len(num_replaced_formatted))

	print(f"Convert UNet result:")
	print(f" - converted: {num_replaced:>{width},} modules")
	print(f" - start: {start_num_params:>{width},} params")
	print(f" - end: {end_num_params:>{width},} params")


	lorm_ignore_if_contains = [
	'proj_out', 'proj_in',
	]

	lorm_parameter_threshold = 1000000


	@torch.no_grad()
	def convert_diffusers_unet_to_lorm(
	unet: UNet2DConditionModel,
	config: LoRMConfig,
	):
	print('Converting UNet to LoRM UNet')
	start_num_params = count_parameters(unet)
	named_modules = list(unet.named_modules())

	num_replaced = 0

	pbar = tqdm(total=len(named_modules), desc="UNet -> LoRM UNet")
	layer_names_replaced = []
	converted_modules = []
	ignore_if_contains = [
	'proj_out', 'proj_in',
	]

	for name, module in named_modules:
	module_name = module.__class__.__name__
	if module_name in UNET_TARGET_REPLACE_MODULE:
	for child_name, child_module in module.named_modules():
	new_module: Union[LoRMCon2d, LoRMLinear, None] = None
	# if child name includes attn, skip it
	combined_name = combined_name = f"{name}.{child_name}"
	# if child_module.__class__.__name__ in LINEAR_MODULES and child_module.bias is None:
	# pass

	lorm_config = config.get_config_for_module(combined_name)

	extract_mode = lorm_config.extract_mode
	extract_mode_param = lorm_config.extract_mode_param
	parameter_threshold = lorm_config.parameter_threshold

	if any([word in child_name for word in ignore_if_contains]):
	pass

	elif child_module.__class__.__name__ in LINEAR_MODULES:
	if count_parameters(child_module) > parameter_threshold:

	# dtype = child_module.weight.dtype
	dtype = torch.float32
	# extract and convert
	down_weight, up_weight, lora_dim, diff = extract_linear(
	weight=child_module.weight.clone().detach().float(),
	mode=extract_mode,
	mode_param=extract_mode_param,
	device=child_module.weight.device,
	)
	if down_weight is None:
	continue
	down_weight = down_weight.to(dtype=dtype)
	up_weight = up_weight.to(dtype=dtype)
	bias_weight = None
	if child_module.bias is not None:
	bias_weight = child_module.bias.data.clone().detach().to(dtype=dtype)
	# linear layer weights = (out_features, in_features)
	new_module = LoRMLinear(
	in_features=down_weight.shape[1],
	lorm_features=lora_dim,
	out_features=up_weight.shape[0],
	bias=bias_weight is not None,
	device=down_weight.device,
	dtype=down_weight.dtype
	)

	# replace the weights
	new_module.down.weight.data = down_weight
	new_module.up.weight.data = up_weight
	if bias_weight is not None:
	new_module.up.bias.data = bias_weight
	# else:
	# new_module.up.bias.data = torch.zeros_like(new_module.up.bias.data)

	# bias_correction = compute_optimal_bias(
	# child_module,
	# new_module.down,
	# new_module.up,
	# torch.randn((1000, down_weight.shape[1])).to(device=down_weight.device, dtype=dtype)
	# )
	# new_module.up.bias.data += bias_correction

	elif child_module.__class__.__name__ in CONV_MODULES:
	if count_parameters(child_module) > parameter_threshold:
	dtype = child_module.weight.dtype
	down_weight, up_weight, lora_dim, diff = extract_conv(
	weight=child_module.weight.clone().detach().float(),
	mode=extract_mode,
	mode_param=extract_mode_param,
	device=child_module.weight.device,
	)
	if down_weight is None:
	continue
	down_weight = down_weight.to(dtype=dtype)
	up_weight = up_weight.to(dtype=dtype)
	bias_weight = None
	if child_module.bias is not None:
	bias_weight = child_module.bias.data.clone().detach().to(dtype=dtype)

	new_module = LoRMCon2d(
	in_channels=down_weight.shape[1],
	lorm_channels=lora_dim,
	out_channels=up_weight.shape[0],
	kernel_size=child_module.kernel_size,
	dilation=child_module.dilation,
	padding=child_module.padding,
	padding_mode=child_module.padding_mode,
	stride=child_module.stride,
	bias=bias_weight is not None,
	device=down_weight.device,
	dtype=down_weight.dtype
	)
	# replace the weights
	new_module.down.weight.data = down_weight
	new_module.up.weight.data = up_weight
	if bias_weight is not None:
	new_module.up.bias.data = bias_weight

	if new_module:
	combined_name = f"{name}.{child_name}"
	replace_module_by_path(unet, combined_name, new_module)
	converted_modules.append(new_module)
	num_replaced += 1
	layer_names_replaced.append(
	f"{combined_name} - {format_with_commas(count_parameters(child_module))}")

	pbar.update(1)
	pbar.close()
	end_num_params = count_parameters(unet)

	def sorting_key(s):
	# Extract the number part, remove commas, and convert to integer
	return int(s.split("-")[1].strip().replace(",", ""))

	sorted_layer_names_replaced = sorted(layer_names_replaced, key=sorting_key, reverse=True)
	for layer_name in sorted_layer_names_replaced:
	print(layer_name)

	print_lorm_extract_details(
	start_num_params=start_num_params,
	end_num_params=end_num_params,
	num_replaced=num_replaced,
	)

	return converted_modules