Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

LightDiffusion-Next / modules /Model /ModelBase.py

Aatricks

Upload folder using huggingface_hub

cfe609e verified 5 months ago

raw

history blame contribute delete

12.8 kB

	import logging
	import math
	import torch

	from modules.Utilities import Latent
	from modules.Device import Device
	from modules.NeuralNetwork import unet
	from modules.cond import cast, cond
	from modules.sample import sampling


	class BaseModel(torch.nn.Module):
	"""#### Base class for models."""

	def __init__(
	self,
	model_config: object,
	model_type: sampling.ModelType = sampling.ModelType.EPS,
	device: torch.device = None,
	unet_model: object = unet.UNetModel1,
	flux: bool = False,
	):
	"""#### Initialize the BaseModel class.

	#### Args:
	- `model_config` (object): The model configuration.
	- `model_type` (sampling.ModelType, optional): The model type. Defaults to sampling.ModelType.EPS.
	- `device` (torch.device, optional): The device to use. Defaults to None.
	- `unet_model` (object, optional): The UNet model. Defaults to unet.UNetModel1.
	"""
	super().__init__()

	unet_config = model_config.unet_config
	self.latent_format = model_config.latent_format
	self.model_config = model_config
	self.manual_cast_dtype = model_config.manual_cast_dtype
	self.device = device
	if flux:
	if not unet_config.get("disable_unet_model_creation", False):
	operations = model_config.custom_operations
	self.diffusion_model = unet_model(
	**unet_config, device=device, operations=operations
	)
	logging.info(
	"model weight dtype {}, manual cast: {}".format(
	self.get_dtype(), self.manual_cast_dtype
	)
	)
	else:
	if not unet_config.get("disable_unet_model_creation", False):
	if self.manual_cast_dtype is not None:
	operations = cast.manual_cast
	else:
	operations = cast.disable_weight_init
	self.diffusion_model = unet_model(
	**unet_config, device=device, operations=operations
	)
	self.model_type = model_type
	self.model_sampling = sampling.model_sampling(
	model_config, model_type, flux=flux
	)

	self.adm_channels = unet_config.get("adm_in_channels", None)
	if self.adm_channels is None:
	self.adm_channels = 0

	self.concat_keys = ()
	logging.info("model_type {}".format(model_type.name))
	logging.debug("adm {}".format(self.adm_channels))
	self.memory_usage_factor = model_config.memory_usage_factor if flux else 2.0

	def apply_model(
	self,
	x: torch.Tensor,
	t: torch.Tensor,
	c_concat: torch.Tensor = None,
	c_crossattn: torch.Tensor = None,
	control: torch.Tensor = None,
	transformer_options: dict = {},
	**kwargs,
	) -> torch.Tensor:
	"""#### Apply the model to the input tensor.

	#### Args:
	- `x` (torch.Tensor): The input tensor.
	- `t` (torch.Tensor): The timestep tensor.
	- `c_concat` (torch.Tensor, optional): The concatenated condition tensor. Defaults to None.
	- `c_crossattn` (torch.Tensor, optional): The cross-attention condition tensor. Defaults to None.
	- `control` (torch.Tensor, optional): The control tensor. Defaults to None.
	- `transformer_options` (dict, optional): The transformer options. Defaults to {}.
	- `**kwargs`: Additional keyword arguments.

	#### Returns:
	- `torch.Tensor`: The output tensor.
	"""
	sigma = t
	xc = self.model_sampling.calculate_input(sigma, x)

	# Optimize concatenation operation by avoiding unnecessary list creation
	if c_concat is not None:
	xc = torch.cat((xc, c_concat), dim=1)

	# Determine dtype once to avoid repeated calls to get_dtype()
	dtype = (
	self.manual_cast_dtype
	if self.manual_cast_dtype is not None
	else self.get_dtype()
	)

	# Batch operations to reduce overhead
	xc = xc.to(dtype)
	t = self.model_sampling.timestep(t).float()
	context = c_crossattn.to(dtype) if c_crossattn is not None else None

	# Process extra conditions more efficiently
	extra_conds = {}
	for name, value in kwargs.items():
	if hasattr(value, "dtype") and value.dtype not in (torch.int, torch.long):
	extra_conds[name] = value.to(dtype)
	else:
	extra_conds[name] = value

	# Run diffusion model and calculate denoised output
	model_output = self.diffusion_model(
	xc,
	t,
	context=context,
	control=control,
	transformer_options=transformer_options,
	**extra_conds,
	).float()

	return self.model_sampling.calculate_denoised(sigma, model_output, x)

	def get_dtype(self) -> torch.dtype:
	"""#### Get the data type of the model.

	#### Returns:
	- `torch.dtype`: The data type.
	"""
	return self.diffusion_model.dtype

	def encode_adm(self, **kwargs) -> None:
	"""#### Encode the ADM.

	#### Args:
	- `**kwargs`: Additional keyword arguments.

	#### Returns:
	- `None`: The encoded ADM.
	"""
	return None

	def extra_conds(self, **kwargs) -> dict:
	"""#### Get the extra conditions.

	#### Args:
	- `**kwargs`: Additional keyword arguments.

	#### Returns:
	- `dict`: The extra conditions.
	"""
	out = {}
	adm = self.encode_adm(**kwargs)
	if adm is not None:
	out["y"] = cond.CONDRegular(adm)

	cross_attn = kwargs.get("cross_attn", None)
	if cross_attn is not None:
	out["c_crossattn"] = cond.CONDCrossAttn(cross_attn)

	cross_attn_cnet = kwargs.get("cross_attn_controlnet", None)
	if cross_attn_cnet is not None:
	out["crossattn_controlnet"] = cond.CONDCrossAttn(cross_attn_cnet)

	return out

	def load_model_weights(self, sd: dict, unet_prefix: str = "") -> "BaseModel":
	"""#### Load the model weights.

	#### Args:
	- `sd` (dict): The state dictionary.
	- `unet_prefix` (str, optional): The UNet prefix. Defaults to "".

	#### Returns:
	- `BaseModel`: The model with loaded weights.
	"""
	to_load = {}
	keys = list(sd.keys())
	for k in keys:
	if k.startswith(unet_prefix):
	to_load[k[len(unet_prefix) :]] = sd.pop(k)

	to_load = self.model_config.process_unet_state_dict(to_load)
	m, u = self.diffusion_model.load_state_dict(to_load, strict=False)
	if len(m) > 0:
	logging.warning("unet missing: {}".format(m))

	if len(u) > 0:
	logging.warning("unet unexpected: {}".format(u))
	del to_load
	return self

	def process_latent_in(self, latent: torch.Tensor) -> torch.Tensor:
	"""#### Process the latent input.

	#### Args:
	- `latent` (torch.Tensor): The latent tensor.

	#### Returns:
	- `torch.Tensor`: The processed latent tensor.
	"""
	return self.latent_format.process_in(latent)

	def process_latent_out(self, latent: torch.Tensor) -> torch.Tensor:
	"""#### Process the latent output.

	#### Args:
	- `latent` (torch.Tensor): The latent tensor.

	#### Returns:
	- `torch.Tensor`: The processed latent tensor.
	"""
	return self.latent_format.process_out(latent)

	def memory_required(self, input_shape: tuple) -> float:
	"""#### Calculate the memory required for the model.

	#### Args:
	- `input_shape` (tuple): The input shape.

	#### Returns:
	- `float`: The memory required.
	"""
	dtype = self.get_dtype()
	if self.manual_cast_dtype is not None:
	dtype = self.manual_cast_dtype
	# TODO: this needs to be tweaked
	area = input_shape[0] * math.prod(input_shape[2:])
	return (area * Device.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (
	1024 * 1024
	)


	class BASE:
	"""#### Base class for model configurations."""

	unet_config = {}
	unet_extra_config = {
	"num_heads": -1,
	"num_head_channels": 64,
	}

	required_keys = {}

	clip_prefix = []
	clip_vision_prefix = None
	noise_aug_config = None
	sampling_settings = {}
	latent_format = Latent.LatentFormat
	vae_key_prefix = ["first_stage_model."]
	text_encoder_key_prefix = ["cond_stage_model."]
	supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]

	memory_usage_factor = 2.0

	manual_cast_dtype = None
	custom_operations = None

	@classmethod
	def matches(cls, unet_config: dict, state_dict: dict = None) -> bool:
	"""#### Check if the UNet configuration matches.

	#### Args:
	- `unet_config` (dict): The UNet configuration.
	- `state_dict` (dict, optional): The state dictionary. Defaults to None.

	#### Returns:
	- `bool`: Whether the configuration matches.
	"""
	for k in cls.unet_config:
	if k not in unet_config or cls.unet_config[k] != unet_config[k]:
	return False
	if state_dict is not None:
	for k in cls.required_keys:
	if k not in state_dict:
	return False
	return True

	def model_type(self, state_dict: dict, prefix: str = "") -> sampling.ModelType:
	"""#### Get the model type.

	#### Args:
	- `state_dict` (dict): The state dictionary.
	- `prefix` (str, optional): The prefix. Defaults to "".

	#### Returns:
	- `sampling.ModelType`: The model type.
	"""
	return sampling.ModelType.EPS

	def inpaint_model(self) -> bool:
	"""#### Check if the model is an inpaint model.

	#### Returns:
	- `bool`: Whether the model is an inpaint model.
	"""
	return self.unet_config["in_channels"] > 4

	def __init__(self, unet_config: dict):
	"""#### Initialize the BASE class.

	#### Args:
	- `unet_config` (dict): The UNet configuration.
	"""
	self.unet_config = unet_config.copy()
	self.sampling_settings = self.sampling_settings.copy()
	self.latent_format = self.latent_format()
	for x in self.unet_extra_config:
	self.unet_config[x] = self.unet_extra_config[x]

	def get_model(
	self, state_dict: dict, prefix: str = "", device: torch.device = None
	) -> BaseModel:
	"""#### Get the model.

	#### Args:
	- `state_dict` (dict): The state dictionary.
	- `prefix` (str, optional): The prefix. Defaults to "".
	- `device` (torch.device, optional): The device to use. Defaults to None.

	#### Returns:
	- `BaseModel`: The model.
	"""
	out = BaseModel(
	self, model_type=self.model_type(state_dict, prefix), device=device
	)
	return out

	def process_unet_state_dict(self, state_dict: dict) -> dict:
	"""#### Process the UNet state dictionary.

	#### Args:
	- `state_dict` (dict): The state dictionary.

	#### Returns:
	- `dict`: The processed state dictionary.
	"""
	return state_dict

	def process_vae_state_dict(self, state_dict: dict) -> dict:
	"""#### Process the VAE state dictionary.

	#### Args:
	- `state_dict` (dict): The state dictionary.

	#### Returns:
	- `dict`: The processed state dictionary.
	"""
	return state_dict

	def set_inference_dtype(
	self, dtype: torch.dtype, manual_cast_dtype: torch.dtype
	) -> None:
	"""#### Set the inference data type.

	#### Args:
	- `dtype` (torch.dtype): The data type.
	- `manual_cast_dtype` (torch.dtype): The manual cast data type.
	"""
	self.unet_config["dtype"] = dtype
	self.manual_cast_dtype = manual_cast_dtype