Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

LightDiffusion-Next / modules /Model /ModelPatcher.py

Aatricks

Upload folder using huggingface_hub

d9a2e19 verified 5 months ago

raw

history blame contribute delete

28.1 kB

	import copy
	import logging
	import uuid

	import torch

	from modules.NeuralNetwork import unet
	from modules.Utilities import util
	from modules.Device import Device

	def wipe_lowvram_weight(m):
	if hasattr(m, "prev_comfy_cast_weights"):
	m.comfy_cast_weights = m.prev_comfy_cast_weights
	del m.prev_comfy_cast_weights
	m.weight_function = None
	m.bias_function = None

	class ModelPatcher:
	def __init__(
	self,
	model: torch.nn.Module,
	load_device: torch.device,
	offload_device: torch.device,
	size: int = 0,
	current_device: torch.device = None,
	weight_inplace_update: bool = False,
	):
	"""#### Initialize the ModelPatcher class.

	#### Args:
	- `model` (torch.nn.Module): The model.
	- `load_device` (torch.device): The device to load the model on.
	- `offload_device` (torch.device): The device to offload the model to.
	- `size` (int, optional): The size of the model. Defaults to 0.
	- `current_device` (torch.device, optional): The current device. Defaults to None.
	- `weight_inplace_update` (bool, optional): Whether to update weights in place. Defaults to False.
	"""
	self.size = size
	self.model = model
	self.patches = {}
	self.backup = {}
	self.object_patches = {}
	self.object_patches_backup = {}
	self.model_options = {"transformer_options": {}}
	self.model_size()
	self.load_device = load_device
	self.offload_device = offload_device
	if current_device is None:
	self.current_device = self.offload_device
	else:
	self.current_device = current_device

	self.weight_inplace_update = weight_inplace_update
	self.model_lowvram = False
	self.lowvram_patch_counter = 0
	self.patches_uuid = uuid.uuid4()

	if not hasattr(self.model, "model_loaded_weight_memory"):
	self.model.model_loaded_weight_memory = 0

	if not hasattr(self.model, "model_lowvram"):
	self.model.model_lowvram = False

	if not hasattr(self.model, "lowvram_patch_counter"):
	self.model.lowvram_patch_counter = 0

	def loaded_size(self) -> int:
	"""#### Get the loaded size

	#### Returns:
	- `int`: The loaded size
	"""
	return self.model.model_loaded_weight_memory

	def model_size(self) -> int:
	"""#### Get the size of the model.

	#### Returns:
	- `int`: The size of the model.
	"""
	if self.size > 0:
	return self.size
	model_sd = self.model.state_dict()
	self.size = Device.module_size(self.model)
	self.model_keys = set(model_sd.keys())
	return self.size

	def clone(self) -> "ModelPatcher":
	"""#### Clone the ModelPatcher object.

	#### Returns:
	- `ModelPatcher`: The cloned ModelPatcher object.
	"""
	n = ModelPatcher(
	self.model,
	self.load_device,
	self.offload_device,
	self.size,
	self.current_device,
	weight_inplace_update=self.weight_inplace_update,
	)
	n.patches = {}
	for k in self.patches:
	n.patches[k] = self.patches[k][:]
	n.patches_uuid = self.patches_uuid

	n.object_patches = self.object_patches.copy()
	n.model_options = copy.deepcopy(self.model_options)
	n.model_keys = self.model_keys
	n.backup = self.backup
	n.object_patches_backup = self.object_patches_backup
	return n

	def is_clone(self, other: object) -> bool:
	"""#### Check if the object is a clone.

	#### Args:
	- `other` (object): The other object.

	#### Returns:
	- `bool`: Whether the object is a clone.
	"""
	if hasattr(other, "model") and self.model is other.model:
	return True
	return False

	def memory_required(self, input_shape: tuple) -> float:
	"""#### Calculate the memory required for the model.

	#### Args:
	- `input_shape` (tuple): The input shape.

	#### Returns:
	- `float`: The memory required.
	"""
	return self.model.memory_required(input_shape=input_shape)

	def set_model_unet_function_wrapper(self, unet_wrapper_function: callable) -> None:
	"""#### Set the UNet function wrapper for the model.

	#### Args:
	- `unet_wrapper_function` (callable): The UNet function wrapper.
	"""
	self.model_options["model_function_wrapper"] = unet_wrapper_function

	def set_model_denoise_mask_function(self, denoise_mask_function: callable) -> None:
	"""#### Set the denoise mask function for the model.

	#### Args:
	- `denoise_mask_function` (callable): The denoise mask function.
	"""
	self.model_options["denoise_mask_function"] = denoise_mask_function

	def get_model_object(self, name: str) -> object:
	"""#### Get an object from the model.

	#### Args:
	- `name` (str): The name of the object.

	#### Returns:
	- `object`: The object.
	"""
	return util.get_attr(self.model, name)

	def model_patches_to(self, device: torch.device) -> None:
	"""#### Move model patches to a device.

	#### Args:
	- `device` (torch.device): The device.
	"""
	self.model_options["transformer_options"]
	if "model_function_wrapper" in self.model_options:
	wrap_func = self.model_options["model_function_wrapper"]
	if hasattr(wrap_func, "to"):
	self.model_options["model_function_wrapper"] = wrap_func.to(device)

	def model_dtype(self) -> torch.dtype:
	"""#### Get the data type of the model.

	#### Returns:
	- `torch.dtype`: The data type.
	"""
	if hasattr(self.model, "get_dtype"):
	return self.model.get_dtype()

	def add_patches(
	self, patches: dict, strength_patch: float = 1.0, strength_model: float = 1.0
	) -> list:
	"""#### Add patches to the model.

	#### Args:
	- `patches` (dict): The patches to add.
	- `strength_patch` (float, optional): The strength of the patches. Defaults to 1.0.
	- `strength_model` (float, optional): The strength of the model. Defaults to 1.0.

	#### Returns:
	- `list`: The list of patched keys.
	"""
	p = set()
	for k in patches:
	if k in self.model_keys:
	p.add(k)
	current_patches = self.patches.get(k, [])
	current_patches.append((strength_patch, patches[k], strength_model))
	self.patches[k] = current_patches

	self.patches_uuid = uuid.uuid4()
	return list(p)

	def set_model_patch(self, patch: list, name: str):
	"""#### Set a patch for the model.

	#### Args:
	- `patch` (list): The patch.
	- `name` (str): The name of the patch.
	"""
	to = self.model_options["transformer_options"]
	if "patches" not in to:
	to["patches"] = {}
	to["patches"][name] = to["patches"].get(name, []) + [patch]

	def set_model_attn1_patch(self, patch: list):
	"""#### Set the attention 1 patch for the model.

	#### Args:
	- `patch` (list): The patch.
	"""
	self.set_model_patch(patch, "attn1_patch")

	def set_model_attn2_patch(self, patch: list):
	"""#### Set the attention 2 patch for the model.

	#### Args:
	- `patch` (list): The patch.
	"""
	self.set_model_patch(patch, "attn2_patch")

	def set_model_attn1_output_patch(self, patch: list):
	"""#### Set the attention 1 output patch for the model.

	#### Args:
	- `patch` (list): The patch.
	"""
	self.set_model_patch(patch, "attn1_output_patch")

	def set_model_attn2_output_patch(self, patch: list):
	"""#### Set the attention 2 output patch for the model.

	#### Args:
	- `patch` (list): The patch.
	"""
	self.set_model_patch(patch, "attn2_output_patch")

	def model_state_dict(self, filter_prefix: str = None) -> dict:
	"""#### Get the state dictionary of the model.

	#### Args:
	- `filter_prefix` (str, optional): The prefix to filter. Defaults to None.

	#### Returns:
	- `dict`: The state dictionary.
	"""
	sd = self.model.state_dict()
	list(sd.keys())
	return sd

	def patch_weight_to_device(self, key: str, device_to: torch.device = None) -> None:
	"""#### Patch the weight of a key to a device.

	#### Args:
	- `key` (str): The key.
	- `device_to` (torch.device, optional): The device to patch to. Defaults to None.
	"""
	if key not in self.patches:
	return

	weight = util.get_attr(self.model, key)

	inplace_update = self.weight_inplace_update

	if key not in self.backup:
	self.backup[key] = weight.to(
	device=self.offload_device, copy=inplace_update
	)

	if device_to is not None:
	temp_weight = Device.cast_to_device(
	weight, device_to, torch.float32, copy=True
	)
	else:
	temp_weight = weight.to(torch.float32, copy=True)
	out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(
	weight.dtype
	)
	if inplace_update:
	util.copy_to_param(self.model, key, out_weight)
	else:
	util.set_attr_param(self.model, key, out_weight)

	def load(
	self,
	device_to: torch.device = None,
	lowvram_model_memory: int = 0,
	force_patch_weights: bool = False,
	full_load: bool = False,
	):
	"""#### Load the model.

	#### Args:
	- `device_to` (torch.device, optional): The device to load to. Defaults to None.
	- `lowvram_model_memory` (int, optional): The low VRAM model memory. Defaults to 0.
	- `force_patch_weights` (bool, optional): Whether to force patch weights. Defaults to False.
	- `full_load` (bool, optional): Whether to fully load the model. Defaults to False.
	"""
	mem_counter = 0
	patch_counter = 0
	lowvram_counter = 0
	loading = []
	for n, m in self.model.named_modules():
	if hasattr(m, "comfy_cast_weights") or hasattr(m, "weight"):
	loading.append((Device.module_size(m), n, m))

	load_completely = []
	loading.sort(reverse=True)
	for x in loading:
	n = x[1]
	m = x[2]
	module_mem = x[0]

	lowvram_weight = False

	if not full_load and hasattr(m, "comfy_cast_weights"):
	if mem_counter + module_mem >= lowvram_model_memory:
	lowvram_weight = True
	lowvram_counter += 1
	if hasattr(m, "prev_comfy_cast_weights"): # Already lowvramed
	continue

	weight_key = "{}.weight".format(n)
	bias_key = "{}.bias".format(n)

	if lowvram_weight:
	if weight_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(weight_key)
	if bias_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(bias_key)

	m.prev_comfy_cast_weights = m.comfy_cast_weights
	m.comfy_cast_weights = True
	else:
	if hasattr(m, "comfy_cast_weights"):
	if m.comfy_cast_weights:
	wipe_lowvram_weight(m)

	if hasattr(m, "weight"):
	mem_counter += module_mem
	load_completely.append((module_mem, n, m))

	load_completely.sort(reverse=True)
	for x in load_completely:
	n = x[1]
	m = x[2]
	weight_key = "{}.weight".format(n)
	bias_key = "{}.bias".format(n)
	if hasattr(m, "comfy_patched_weights"):
	if m.comfy_patched_weights is True:
	continue

	self.patch_weight_to_device(weight_key, device_to=device_to)
	self.patch_weight_to_device(bias_key, device_to=device_to)
	logging.debug("lowvram: loaded module regularly {} {}".format(n, m))
	m.comfy_patched_weights = True

	for x in load_completely:
	x[2].to(device_to)

	if lowvram_counter > 0:
	logging.info(
	"loaded partially {} {} {}".format(
	lowvram_model_memory / (1024 * 1024),
	mem_counter / (1024 * 1024),
	patch_counter,
	)
	)
	self.model.model_lowvram = True
	else:
	logging.info(
	"loaded completely {} {} {}".format(
	lowvram_model_memory / (1024 * 1024),
	mem_counter / (1024 * 1024),
	full_load,
	)
	)
	self.model.model_lowvram = False
	if full_load:
	self.model.to(device_to)
	mem_counter = self.model_size()


	self.model.lowvram_patch_counter += patch_counter
	self.model.device = device_to
	self.model.model_loaded_weight_memory = mem_counter

	def patch_model_flux(
	self,
	device_to: torch.device = None,
	lowvram_model_memory: int =0,
	load_weights: bool = True,
	force_patch_weights: bool = False,
	):
	"""#### Patch the model.

	#### Args:
	- `device_to` (torch.device, optional): The device to patch to. Defaults to None.
	- `lowvram_model_memory` (int, optional): The low VRAM model memory. Defaults to 0.
	- `load_weights` (bool, optional): Whether to load weights. Defaults to True.
	- `force_patch_weights` (bool, optional): Whether to force patch weights. Defaults to False.

	#### Returns:
	- `torch.nn.Module`: The patched model.
	"""
	for k in self.object_patches:
	old = util.set_attr(self.model, k, self.object_patches[k])
	if k not in self.object_patches_backup:
	self.object_patches_backup[k] = old

	if lowvram_model_memory == 0:
	full_load = True
	else:
	full_load = False

	if load_weights:
	self.load(
	device_to,
	lowvram_model_memory=lowvram_model_memory,
	force_patch_weights=force_patch_weights,
	full_load=full_load,
	)
	return self.model

	def patch_model_lowvram_flux(
	self,
	device_to: torch.device = None,
	lowvram_model_memory: int = 0,
	force_patch_weights: bool = False,
	) -> torch.nn.Module:
	"""#### Patch the model for low VRAM.

	#### Args:
	- `device_to` (torch.device, optional): The device to patch to. Defaults to None.
	- `lowvram_model_memory` (int, optional): The low VRAM model memory. Defaults to 0.
	- `force_patch_weights` (bool, optional): Whether to force patch weights. Defaults to False.

	#### Returns:
	- `torch.nn.Module`: The patched model.
	"""
	self.patch_model(device_to)

	logging.info(
	"loading in lowvram mode {}".format(lowvram_model_memory / (1024 * 1024))
	)

	class LowVramPatch:
	def __init__(self, key: str, model_patcher: "ModelPatcher"):
	self.key = key
	self.model_patcher = model_patcher

	def __call__(self, weight: torch.Tensor) -> torch.Tensor:
	return self.model_patcher.calculate_weight(
	self.model_patcher.patches[self.key], weight, self.key
	)

	mem_counter = 0
	patch_counter = 0
	for n, m in self.model.named_modules():
	lowvram_weight = False
	if hasattr(m, "comfy_cast_weights"):
	module_mem = Device.module_size(m)
	if mem_counter + module_mem >= lowvram_model_memory:
	lowvram_weight = True

	weight_key = "{}.weight".format(n)
	bias_key = "{}.bias".format(n)

	if lowvram_weight:
	if weight_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(weight_key)
	else:
	m.weight_function = LowVramPatch(weight_key, self)
	patch_counter += 1
	if bias_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(bias_key)
	else:
	m.bias_function = LowVramPatch(bias_key, self)
	patch_counter += 1

	m.prev_comfy_cast_weights = m.comfy_cast_weights
	m.comfy_cast_weights = True
	else:
	if hasattr(m, "weight"):
	self.patch_weight_to_device(weight_key, device_to)
	self.patch_weight_to_device(bias_key, device_to)
	m.to(device_to)
	mem_counter += Device.module_size(m)
	logging.debug("lowvram: loaded module regularly {}".format(m))

	self.model_lowvram = True
	self.lowvram_patch_counter = patch_counter
	return self.model

	def patch_model(
	self, device_to: torch.device = None, patch_weights: bool = True
	) -> torch.nn.Module:
	"""#### Patch the model.

	#### Args:
	- `device_to` (torch.device, optional): The device to patch to. Defaults to None.
	- `patch_weights` (bool, optional): Whether to patch weights. Defaults to True.

	#### Returns:
	- `torch.nn.Module`: The patched model.
	"""
	for k in self.object_patches:
	old = util.set_attr(self.model, k, self.object_patches[k])
	if k not in self.object_patches_backup:
	self.object_patches_backup[k] = old

	if patch_weights:
	model_sd = self.model_state_dict()
	for key in self.patches:
	if key not in model_sd:
	logging.warning(
	"could not patch. key doesn't exist in model: {}".format(key)
	)
	continue

	self.patch_weight_to_device(key, device_to)

	if device_to is not None:
	self.model.to(device_to)
	self.current_device = device_to

	return self.model

	def patch_model_lowvram(
	self,
	device_to: torch.device = None,
	lowvram_model_memory: int = 0,
	force_patch_weights: bool = False,
	) -> torch.nn.Module:
	"""#### Patch the model for low VRAM.

	#### Args:
	- `device_to` (torch.device, optional): The device to patch to. Defaults to None.
	- `lowvram_model_memory` (int, optional): The low VRAM model memory. Defaults to 0.
	- `force_patch_weights` (bool, optional): Whether to force patch weights. Defaults to False.

	#### Returns:
	- `torch.nn.Module`: The patched model.
	"""
	self.patch_model(device_to, patch_weights=False)

	logging.info(
	"loading in lowvram mode {}".format(lowvram_model_memory / (1024 * 1024))
	)

	class LowVramPatch:
	def __init__(self, key: str, model_patcher: "ModelPatcher"):
	self.key = key
	self.model_patcher = model_patcher

	def __call__(self, weight: torch.Tensor) -> torch.Tensor:
	return self.model_patcher.calculate_weight(
	self.model_patcher.patches[self.key], weight, self.key
	)

	mem_counter = 0
	patch_counter = 0
	for n, m in self.model.named_modules():
	lowvram_weight = False
	if hasattr(m, "comfy_cast_weights"):
	module_mem = Device.module_size(m)
	if mem_counter + module_mem >= lowvram_model_memory:
	lowvram_weight = True

	weight_key = "{}.weight".format(n)
	bias_key = "{}.bias".format(n)

	if lowvram_weight:
	if weight_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(weight_key)
	else:
	m.weight_function = LowVramPatch(weight_key, self)
	patch_counter += 1
	if bias_key in self.patches:
	if force_patch_weights:
	self.patch_weight_to_device(bias_key)
	else:
	m.bias_function = LowVramPatch(bias_key, self)
	patch_counter += 1

	m.prev_comfy_cast_weights = m.comfy_cast_weights
	m.comfy_cast_weights = True
	else:
	if hasattr(m, "weight"):
	self.patch_weight_to_device(weight_key, device_to)
	self.patch_weight_to_device(bias_key, device_to)
	m.to(device_to)
	mem_counter += Device.module_size(m)
	logging.debug("lowvram: loaded module regularly {}".format(m))

	self.model_lowvram = True
	self.lowvram_patch_counter = patch_counter
	return self.model

	def calculate_weight(
	self, patches: list, weight: torch.Tensor, key: str
	) -> torch.Tensor:
	"""#### Calculate the weight of a key.

	#### Args:
	- `patches` (list): The list of patches.
	- `weight` (torch.Tensor): The weight tensor.
	- `key` (str): The key.

	#### Returns:
	- `torch.Tensor`: The calculated weight.
	"""
	for p in patches:
	alpha = p[0]
	v = p[1]
	p[2]
	v[0]
	v = v[1]
	mat1 = Device.cast_to_device(v[0], weight.device, torch.float32)
	mat2 = Device.cast_to_device(v[1], weight.device, torch.float32)
	v[4]
	if v[2] is not None:
	alpha *= v[2] / mat2.shape[0]
	weight += (
	(alpha * torch.mm(mat1.flatten(start_dim=1), mat2.flatten(start_dim=1)))
	.reshape(weight.shape)
	.type(weight.dtype)
	)
	return weight

	def unpatch_model(
	self, device_to: torch.device = None, unpatch_weights: bool = True
	) -> None:
	"""#### Unpatch the model.

	#### Args:
	- `device_to` (torch.device, optional): The device to unpatch to. Defaults to None.
	- `unpatch_weights` (bool, optional): Whether to unpatch weights. Defaults to True.
	"""
	if unpatch_weights:
	keys = list(self.backup.keys())
	for k in keys:
	util.set_attr_param(self.model, k, self.backup[k])
	self.backup.clear()
	if device_to is not None:
	self.model.to(device_to)
	self.current_device = device_to

	keys = list(self.object_patches_backup.keys())
	self.object_patches_backup.clear()

	def partially_load(self, device_to: torch.device, extra_memory: int = 0) -> int:
	"""#### Partially load the model.

	#### Args:
	- `device_to` (torch.device): The device to load to.
	- `extra_memory` (int, optional): The extra memory. Defaults to 0.

	#### Returns:
	- `int`: The memory loaded.
	"""
	self.unpatch_model(unpatch_weights=False)
	self.patch_model(patch_weights=False)
	full_load = False
	if self.model.model_lowvram is False:
	return 0
	if self.model.model_loaded_weight_memory + extra_memory > self.model_size():
	full_load = True
	current_used = self.model.model_loaded_weight_memory
	self.load(
	device_to,
	lowvram_model_memory=current_used + extra_memory,
	full_load=full_load,
	)
	return self.model.model_loaded_weight_memory - current_used

	def add_object_patch(self, name, obj):
	self.object_patches[name] = obj

	def unet_prefix_from_state_dict(state_dict: dict) -> str:
	"""#### Get the UNet prefix from the state dictionary.

	#### Args:
	- `state_dict` (dict): The state dictionary.

	#### Returns:
	- `str`: The UNet prefix.
	"""
	candidates = [
	"model.diffusion_model.", # ldm/sgm models
	"model.model.", # audio models
	]
	counts = {k: 0 for k in candidates}
	for k in state_dict:
	for c in candidates:
	if k.startswith(c):
	counts[c] += 1
	break

	top = max(counts, key=counts.get)
	if counts[top] > 5:
	return top
	else:
	return "model." # aura flow and others

	def load_diffusion_model_state_dict(
	sd, model_options={}
	) -> ModelPatcher:
	"""#### Load the diffusion model state dictionary.

	#### Args:
	- `sd`: The state dictionary.
	- `model_options` (dict, optional): The model options. Defaults to {}.

	#### Returns:
	- `ModelPatcher`: The model patcher.
	"""
	# load unet in diffusers or regular format
	dtype = model_options.get("dtype", None)

	# Allow loading unets from checkpoint files
	diffusion_model_prefix = unet_prefix_from_state_dict(sd)
	temp_sd = util.state_dict_prefix_replace(
	sd, {diffusion_model_prefix: ""}, filter_keys=True
	)
	if len(temp_sd) > 0:
	sd = temp_sd

	parameters = util.calculate_parameters(sd)
	load_device = Device.get_torch_device()
	model_config = unet.model_config_from_unet(sd, "")

	if model_config is not None:
	new_sd = sd

	offload_device = Device.unet_offload_device()
	if dtype is None:
	unet_dtype2 = Device.unet_dtype(
	model_params=parameters,
	supported_dtypes=model_config.supported_inference_dtypes,
	)
	else:
	unet_dtype2 = dtype

	manual_cast_dtype = Device.unet_manual_cast(
	unet_dtype2, load_device, model_config.supported_inference_dtypes
	)
	model_config.set_inference_dtype(unet_dtype2, manual_cast_dtype)
	model_config.custom_operations = model_options.get(
	"custom_operations", model_config.custom_operations
	)
	model = model_config.get_model(new_sd, "")
	model = model.to(offload_device)
	model.load_model_weights(new_sd, "")
	left_over = sd.keys()
	if len(left_over) > 0:
	logging.info("left over keys in unet: {}".format(left_over))
	return ModelPatcher(model, load_device=load_device, offload_device=offload_device)