Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

LightDiffusion-Next / modules /hidiffusion /msw_msa_attention.py

Aatricks

Upload folder using huggingface_hub

d9a2e19 verified 4 months ago

raw

history blame contribute delete

30.2 kB

	from __future__ import annotations

	import itertools
	import math
	from time import time
	from typing import Any, NamedTuple
	from modules.Model import ModelPatcher

	import torch

	from . import utils
	from .utils import (
	IntegratedNode,
	ModelType,
	StrEnum,
	TimeMode,
	block_to_num,
	check_time,
	convert_time,
	get_sigma,
	guess_model_type,
	logger,
	parse_blocks,
	rescale_size,
	scale_samples,
	)

	F = torch.nn.functional

	SCALE_METHODS = ()
	REVERSE_SCALE_METHODS = ()


	# Taken from https://github.com/blepping/comfyui_jankhidiffusion


	def init_integrations(_integrations) -> None:
	"""#### Initialize integrations.

	#### Args:
	- `_integrations` (Any): The integrations object.
	"""
	global scale_samples, SCALE_METHODS, REVERSE_SCALE_METHODS # noqa: PLW0603
	SCALE_METHODS = ("disabled", "skip", *utils.UPSCALE_METHODS)
	REVERSE_SCALE_METHODS = utils.UPSCALE_METHODS
	scale_samples = utils.scale_samples


	utils.MODULES.register_init_handler(init_integrations)

	DEFAULT_WARN_INTERVAL = 60


	class Preset(NamedTuple):
	"""#### Class representing a preset configuration.

	#### Args:
	- `input_blocks` (str): The input blocks.
	- `middle_blocks` (str): The middle blocks.
	- `output_blocks` (str): The output blocks.
	- `time_mode` (TimeMode): The time mode.
	- `start_time` (float): The start time.
	- `end_time` (float): The end time.
	- `scale_mode` (str): The scale mode.
	- `reverse_scale_mode` (str): The reverse scale mode.
	"""
	input_blocks: str = ""
	middle_blocks: str = ""
	output_blocks: str = ""
	time_mode: TimeMode = TimeMode.PERCENT
	start_time: float = 0.2
	end_time: float = 1.0
	scale_mode: str = "nearest-exact"
	reverse_scale_mode: str = "nearest-exact"

	@property
	def as_dict(self):
	"""#### Convert the preset to a dictionary.

	#### Returns:
	- `Dict[str, Any]`: The preset as a dictionary.
	"""
	return {k: getattr(self, k) for k in self._fields}

	@property
	def pretty_blocks(self):
	"""#### Get a pretty string representation of the blocks.

	#### Returns:
	- `str`: The pretty string representation of the blocks.
	"""
	blocks = (self.input_blocks, self.middle_blocks, self.output_blocks)
	return " / ".join(b or "none" for b in blocks)


	SIMPLE_PRESETS = {
	ModelType.SD15: Preset(input_blocks="1,2", output_blocks="11,10,9"),
	ModelType.SDXL: Preset(input_blocks="4,5", output_blocks="3,4,5"),
	}


	class WindowSize(NamedTuple):
	"""#### Class representing the window size.

	#### Args:
	- `height` (int): The height of the window.
	- `width` (int): The width of the window.
	"""
	height: int
	width: int

	@property
	def sum(self):
	"""#### Get the sum of the height and width.

	#### Returns:
	- `int`: The sum of the height and width.
	"""
	return self.height * self.width

	def __neg__(self):
	"""#### Negate the window size.

	#### Returns:
	- `WindowSize`: The negated window size.
	"""
	return self.__class__(-self.height, -self.width)


	class ShiftSize(WindowSize):
	"""#### Class representing the shift size."""
	pass


	class LastShiftMode(StrEnum):
	"""#### Enum for the last shift mode."""
	GLOBAL = "global"
	BLOCK = "block"
	BOTH = "both"
	IGNORE = "ignore"


	class LastShiftStrategy(StrEnum):
	"""#### Enum for the last shift strategy."""
	INCREMENT = "increment"
	DECREMENT = "decrement"
	RETRY = "retry"


	class Config(NamedTuple):
	"""#### Class representing the configuration.

	#### Args:
	- `start_sigma` (float): The start sigma.
	- `end_sigma` (float): The end sigma.
	- `use_blocks` (set): The blocks to use.
	- `scale_mode` (str): The scale mode.
	- `reverse_scale_mode` (str): The reverse scale mode.
	- `silent` (bool): Whether to disable log warnings.
	- `last_shift_mode` (LastShiftMode): The last shift mode.
	- `last_shift_strategy` (LastShiftStrategy): The last shift strategy.
	- `pre_window_multiplier` (float): The pre-window multiplier.
	- `post_window_multiplier` (float): The post-window multiplier.
	- `pre_window_reverse_multiplier` (float): The pre-window reverse multiplier.
	- `post_window_reverse_multiplier` (float): The post-window reverse multiplier.
	- `force_apply_attn2` (bool): Whether to force apply attention 2.
	- `rescale_search_tolerance` (int): The rescale search tolerance.
	- `verbose` (int): The verbosity level.
	"""
	start_sigma: float
	end_sigma: float
	use_blocks: set
	scale_mode: str = "nearest-exact"
	reverse_scale_mode: str = "nearest-exact"
	# Allows disabling the log warning for incompatible sizes.
	silent: bool = False
	# Mode for trying to avoid using the same window size consecutively.
	last_shift_mode: LastShiftMode = LastShiftMode.GLOBAL
	# Strategy to use when avoiding a duplicate window size.
	last_shift_strategy: LastShiftStrategy = LastShiftStrategy.INCREMENT
	# Allows multiplying the tensor going into/out of the window or window reverse effect.
	pre_window_multiplier: float = 1.0
	post_window_multiplier: float = 1.0
	pre_window_reverse_multiplier: float = 1.0
	post_window_reverse_multiplier: float = 1.0
	force_apply_attn2: bool = False
	rescale_search_tolerance: int = 1
	verbose: int = 0

	@classmethod
	def build(
	cls,
	*,
	ms: object,
	input_blocks: str \| list[int],
	middle_blocks: str \| list[int],
	output_blocks: str \| list[int],
	time_mode: str \| TimeMode,
	start_time: float,
	end_time: float,
	**kwargs: dict,
	) -> object:
	"""#### Build a configuration object.

	#### Args:
	- `ms` (object): The model sampling object.
	- `input_blocks` (str \| List[int]): The input blocks.
	- `middle_blocks` (str \| List[int]): The middle blocks.
	- `output_blocks` (str \| List[int]): The output blocks.
	- `time_mode` (str \| TimeMode): The time mode.
	- `start_time` (float): The start time.
	- `end_time` (float): The end time.
	- `kwargs` (Dict[str, Any]): Additional keyword arguments.

	#### Returns:
	- `Config`: The configuration object.
	"""
	time_mode: TimeMode = TimeMode(time_mode)
	start_sigma, end_sigma = convert_time(ms, time_mode, start_time, end_time)
	input_blocks, middle_blocks, output_blocks = itertools.starmap(
	parse_blocks,
	(
	("input", input_blocks),
	("middle", middle_blocks),
	("output", output_blocks),
	),
	)
	return cls.__new__(
	cls,
	start_sigma=start_sigma,
	end_sigma=end_sigma,
	use_blocks=input_blocks \| middle_blocks \| output_blocks,
	**kwargs,
	)

	@staticmethod
	def maybe_multiply(
	t: torch.Tensor,
	multiplier: float = 1.0,
	post: bool = False,
	) -> torch.Tensor:
	"""#### Multiply a tensor by a multiplier.

	#### Args:
	- `t` (torch.Tensor): The input tensor.
	- `multiplier` (float, optional): The multiplier. Defaults to 1.0.
	- `post` (bool, optional): Whether to multiply in-place. Defaults to False.

	#### Returns:
	- `torch.Tensor`: The multiplied tensor.
	"""
	if multiplier == 1.0:
	return t
	return t.mul_(multiplier) if post else t * multiplier


	class State:
	"""#### Class representing the state.

	#### Args:
	- `config` (Config): The configuration object.
	"""
	__slots__ = (
	"config",
	"last_block",
	"last_shift",
	"last_shifts",
	"last_sigma",
	"last_warned",
	"window_args",
	)

	def __init__(self, config):
	self.config = config
	self.last_warned = None
	self.reset()

	def reset(self):
	"""#### Reset the state."""
	self.window_args = None
	self.last_sigma = None
	self.last_block = None
	self.last_shift = None
	self.last_shifts = {}

	@property
	def pretty_last_block(self) -> str:
	"""#### Get a pretty string representation of the last block.

	#### Returns:
	- `str`: The pretty string representation of the last block.
	"""
	if self.last_block is None:
	return "unknown"
	bt, bnum = self.last_block
	attstr = "" if not self.config.force_apply_attn2 else "attn2."
	btstr = ("in", "mid", "out")[bt]
	return f"{attstr}{btstr}.{bnum}"

	def maybe_warning(self, s):
	"""#### Log a warning if necessary.

	#### Args:
	- `s` (str): The warning message.
	"""
	if self.config.silent:
	return
	now = time()
	if (
	self.config.verbose >= 2
	or self.last_warned is None
	or now - self.last_warned >= DEFAULT_WARN_INTERVAL
	):
	logger.warning(
	f"** jankhidiffusion: MSW-MSA attention({self.pretty_last_block}): {s}",
	)
	self.last_warned = now

	def __repr__(self):
	"""#### Get a string representation of the state.

	#### Returns:
	- `str`: The string representation of the state.
	"""
	return f"<MSWMSAAttentionState:last_sigma={self.last_sigma}, last_block={self.pretty_last_block}, last_shift={self.last_shift}, last_shifts={self.last_shifts}>"


	class ApplyMSWMSAAttention(metaclass=IntegratedNode):
	"""#### Class for applying MSW-MSA attention."""
	RETURN_TYPES = ("MODEL",)
	OUTPUT_TOOLTIPS = ("Model patched with the MSW-MSA attention effect.",)
	FUNCTION = "patch"
	CATEGORY = "model_patches/unet"
	DESCRIPTION = "This node applies an attention patch which _may_ slightly improve quality especially when generating at high resolutions. It is a large performance increase on SD1.x, may improve performance on SDXL. This is the advanced version of the node with more parameters, use ApplyMSWMSAAttentionSimple if this seems too complex. NOTE: Only supports SD1.x, SD2.x and SDXL."

	@classmethod
	def INPUT_TYPES(cls):
	"""#### Get the input types for the class.

	#### Returns:
	- `Dict[str, Any]`: The input types.
	"""
	return {
	"required": {
	"input_blocks": (
	"STRING",
	{
	"default": "1,2",
	"tooltip": "Comma-separated list of input blocks to patch. Default is for SD1.x, you can try 4,5 for SDXL",
	},
	),
	"middle_blocks": (
	"STRING",
	{
	"default": "",
	"tooltip": "Comma-separated list of middle blocks to patch. Generally not recommended.",
	},
	),
	"output_blocks": (
	"STRING",
	{
	"default": "9,10,11",
	"tooltip": "Comma-separated list of output blocks to patch. Default is for SD1.x, you can try 3,4,5 for SDXL",
	},
	),
	"time_mode": (
	tuple(str(val) for val in TimeMode),
	{
	"default": "percent",
	"tooltip": "Time mode controls how to interpret the values in start_time and end_time.",
	},
	),
	"start_time": (
	"FLOAT",
	{
	"default": 0.0,
	"min": 0.0,
	"max": 999.0,
	"round": False,
	"step": 0.01,
	"tooltip": "Time the MSW-MSA attention effect starts applying - value is inclusive.",
	},
	),
	"end_time": (
	"FLOAT",
	{
	"default": 1.0,
	"min": 0.0,
	"max": 999.0,
	"round": False,
	"step": 0.01,
	"tooltip": "Time the MSW-MSA attention effect ends - value is inclusive.",
	},
	),
	"model": (
	"MODEL",
	{
	"tooltip": "Model to patch with the MSW-MSA attention effect.",
	},
	),
	},
	"optional": {
	"yaml_parameters": (
	"STRING",
	{
	"tooltip": "Allows specifying custom parameters via YAML. You can also override any of the normal parameters by key. This input can be converted into a multiline text widget. See main README for possible options. Note: When specifying paramaters this way, there is very little error checking.",
	"dynamicPrompts": False,
	"multiline": True,
	"defaultInput": True,
	},
	),
	},
	}

	# reference: https://github.com/microsoft/Swin-Transformer
	# Window functions adapted from https://github.com/megvii-research/HiDiffusion
	@staticmethod
	def window_partition(
	x: torch.Tensor,
	state: State,
	window_index: int,
	) -> torch.Tensor:
	"""#### Partition a tensor into windows.

	#### Args:
	- `x` (torch.Tensor): The input tensor.
	- `state` (State): The state object.
	- `window_index` (int): The window index.

	#### Returns:
	- `torch.Tensor`: The partitioned tensor.
	"""
	config = state.config
	scale_mode = config.scale_mode
	x = config.maybe_multiply(x, config.pre_window_multiplier)
	window_size, shift_size, height, width = state.window_args[window_index]
	do_rescale = (height % 2 + width % 2) != 0
	if do_rescale:
	if scale_mode == "skip":
	state.maybe_warning(
	"Incompatible latent size - skipping MSW-MSA attention.",
	)
	return x
	if scale_mode == "disabled":
	state.maybe_warning(
	"Incompatible latent size - trying to proceed anyway. This may result in an error.",
	)
	do_rescale = False
	else:
	state.maybe_warning(
	"Incompatible latent size - applying scaling workaround. Note: This may reduce quality - use resolutions that are multiples of 64 when possible.",
	)
	batch, _features, channels = x.shape
	wheight, wwidth = window_size
	x = x.view(batch, height, width, channels)
	if do_rescale:
	x = (
	scale_samples(
	x.permute(0, 3, 1, 2).contiguous(),
	wwidth * 2,
	wheight * 2,
	mode=scale_mode,
	sigma=state.last_sigma,
	)
	.permute(0, 2, 3, 1)
	.contiguous()
	)
	if shift_size.sum > 0:
	x = torch.roll(x, shifts=-shift_size, dims=(1, 2))
	x = x.view(batch, 2, wheight, 2, wwidth, channels)
	windows = (
	x.permute(0, 1, 3, 2, 4, 5)
	.contiguous()
	.view(-1, window_size.height, window_size.width, channels)
	)
	return config.maybe_multiply(
	windows.view(-1, window_size.sum, channels),
	config.post_window_multiplier,
	)

	@staticmethod
	def window_reverse(
	windows: torch.Tensor,
	state: State,
	window_index: int = 0,
	) -> torch.Tensor:
	"""#### Reverse the window partitioning of a tensor.

	#### Args:
	- `windows` (torch.Tensor): The input windows tensor.
	- `state` (State): The state object.
	- `window_index` (int, optional): The window index. Defaults to 0.

	#### Returns:
	- `torch.Tensor`: The reversed tensor.
	"""
	config = state.config
	windows = config.maybe_multiply(windows, config.pre_window_reverse_multiplier)
	window_size, shift_size, height, width = state.window_args[window_index]
	do_rescale = (height % 2 + width % 2) != 0
	if do_rescale:
	if config.scale_mode == "skip":
	return windows
	if config.scale_mode == "disabled":
	do_rescale = False
	batch, _features, channels = windows.shape
	wheight, wwidth = window_size
	windows = windows.view(-1, wheight, wwidth, channels)
	batch = int(windows.shape[0] / 4)
	x = windows.view(batch, 2, 2, wheight, wwidth, -1)
	x = (
	x.permute(0, 1, 3, 2, 4, 5)
	.contiguous()
	.view(batch, wheight * 2, wwidth * 2, -1)
	)
	if shift_size.sum > 0:
	x = torch.roll(x, shifts=shift_size, dims=(1, 2))
	if do_rescale:
	x = (
	scale_samples(
	x.permute(0, 3, 1, 2).contiguous(),
	width,
	height,
	mode=config.reverse_scale_mode,
	sigma=state.last_sigma,
	)
	.permute(0, 2, 3, 1)
	.contiguous()
	)
	return config.maybe_multiply(
	x.view(batch, height * width, channels),
	config.post_window_reverse_multiplier,
	)

	@staticmethod
	def get_window_args(
	config: Config,
	n: torch.Tensor,
	orig_shape: tuple,
	shift: int,
	) -> tuple[WindowSize, ShiftSize, int, int]:
	"""#### Get window arguments for MSW-MSA attention.

	#### Args:
	- `config` (Config): The configuration object.
	- `n` (torch.Tensor): The input tensor.
	- `orig_shape` (tuple): The original shape of the tensor.
	- `shift` (int): The shift value.

	#### Returns:
	- `tuple[WindowSize, ShiftSize, int, int]`: The window size, shift size, height, and width.
	"""
	_batch, features, _channels = n.shape
	orig_height, orig_width = orig_shape[-2:]

	width, height = rescale_size(
	orig_width,
	orig_height,
	features,
	tolerance=config.rescale_search_tolerance,
	)
	# if (height, width) != (orig_height, orig_width):
	# print(
	# f"\nRESC: features={features}, orig={(orig_height, orig_width)}, new={(height, width)}",
	# )
	wheight, wwidth = math.ceil(height / 2), math.ceil(width / 2)

	if shift == 0:
	shift_size = ShiftSize(0, 0)
	elif shift == 1:
	shift_size = ShiftSize(wheight // 4, wwidth // 4)
	elif shift == 2:
	shift_size = ShiftSize(wheight // 4 * 2, wwidth // 4 * 2)
	else:
	shift_size = ShiftSize(wheight // 4 * 3, wwidth // 4 * 3)
	return (WindowSize(wheight, wwidth), shift_size, height, width)

	@staticmethod
	def get_shift(
	curr_block: tuple,
	state: State,
	*,
	shift_count=4,
	) -> int:
	"""#### Get the shift value for MSW-MSA attention.

	#### Args:
	- `curr_block` (tuple): The current block.
	- `state` (State): The state object.
	- `shift_count` (int, optional): The shift count. Defaults to 4.

	#### Returns:
	- `int`: The shift value.
	"""
	mode = state.config.last_shift_mode
	strat = state.config.last_shift_strategy
	shift = int(torch.rand(1, device="cpu").item() * shift_count)
	block_last_shift = state.last_shifts.get(curr_block)
	last_shift = state.last_shift
	if mode == LastShiftMode.BOTH:
	avoid = {block_last_shift, last_shift}
	elif mode == LastShiftMode.BLOCK:
	avoid = {block_last_shift}
	elif mode == LastShiftMode.GLOBAL:
	avoid = {last_shift}
	else:
	avoid = {}
	if shift in avoid:
	if strat == LastShiftStrategy.DECREMENT:
	while shift in avoid:
	shift -= 1
	if shift < 0:
	shift = shift_count - 1
	elif strat == LastShiftStrategy.RETRY:
	while shift in avoid:
	shift = int(torch.rand(1, device="cpu").item() * shift_count)
	else:
	# Increment
	while shift in avoid:
	shift = (shift + 1) % shift_count
	return shift

	@classmethod
	def patch(
	cls,
	*,
	model: ModelPatcher.ModelPatcher,
	yaml_parameters: str \| None = None,
	**kwargs: dict[str, Any],
	) -> tuple[ModelPatcher.ModelPatcher]:
	"""#### Patch the model with MSW-MSA attention.

	#### Args:
	- `model` (ModelPatcher.ModelPatcher): The model patcher.
	- `yaml_parameters` (str \| None, optional): The YAML parameters. Defaults to None.
	- `kwargs` (dict[str, Any]): Additional keyword arguments.

	#### Returns:
	- `tuple[ModelPatcher.ModelPatcher]`: The patched model.
	"""
	if yaml_parameters:
	import yaml # noqa: PLC0415

	extra_params = yaml.safe_load(yaml_parameters)
	if extra_params is None:
	pass
	elif not isinstance(extra_params, dict):
	raise ValueError(
	"MSWMSAAttention: yaml_parameters must either be null or an object",
	)
	else:
	kwargs \|= extra_params
	config = Config.build(
	ms=model.get_model_object("model_sampling"),
	**kwargs,
	)
	if not config.use_blocks:
	return (model,)
	if config.verbose:
	logger.info(
	f"** jankhidiffusion: MSW-MSA Attention: Using config: {config}",
	)

	model = model.clone()
	state = State(config)

	def attn_patch(
	q: torch.Tensor,
	k: torch.Tensor,
	v: torch.Tensor,
	extra_options: dict,
	) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
	"""#### Apply attention patch.

	#### Args:
	- `q` (torch.Tensor): The query tensor.
	- `k` (torch.Tensor): The key tensor.
	- `v` (torch.Tensor): The value tensor.
	- `extra_options` (dict): Additional options.

	#### Returns:
	- `tuple[torch.Tensor, torch.Tensor, torch.Tensor]`: The patched tensors.
	"""
	state.window_args = None
	sigma = get_sigma(extra_options)
	block = extra_options.get("block", ("missing", 0))
	curr_block = block_to_num(*block)
	if state.last_sigma is not None and sigma > state.last_sigma:
	# logging.warning(
	# f"Doing reset: block={block}, sigma={sigma}, state={state}",
	# )
	state.reset()
	state.last_block = curr_block
	state.last_sigma = sigma
	if block not in config.use_blocks or not check_time(
	sigma,
	config.start_sigma,
	config.end_sigma,
	):
	return q, k, v
	orig_shape = extra_options["original_shape"]
	# MSW-MSA
	shift = cls.get_shift(curr_block, state)
	state.last_shifts[curr_block] = state.last_shift = shift
	try:
	# get_window_args() can fail with ValueError in rescale_size() for some weird resolutions/aspect ratios
	# so we catch it here and skip MSW-MSA attention in that case.
	state.window_args = tuple(
	cls.get_window_args(config, x, orig_shape, shift)
	if x is not None
	else None
	for x in (q, k, v)
	)
	attn_parts = (q,) if q is not None and q is k and q is v else (q, k, v)
	result = tuple(
	cls.window_partition(tensor, state, idx)
	if tensor is not None
	else None
	for idx, tensor in enumerate(attn_parts)
	)
	except (RuntimeError, ValueError) as exc:
	logger.warning(
	f"** jankhidiffusion: Exception applying MSW-MSA attention: Incompatible model patches or bad resolution. Try using resolutions that are multiples of 64 or set scale/reverse_scale modes to something other than disabled. Original exception: {exc}",
	)
	state.window_args = None
	return q, k, v
	return result * 3 if len(result) == 1 else result

	def attn_output_patch(n: torch.Tensor, extra_options: dict) -> torch.Tensor:
	"""#### Apply attention output patch.

	#### Args:
	- `n` (torch.Tensor): The input tensor.
	- `extra_options` (dict): Additional options.

	#### Returns:
	- `torch.Tensor`: The patched tensor.
	"""
	if state.window_args is None or state.last_block != block_to_num(
	*extra_options.get("block", ("missing", 0)),
	):
	state.window_args = None
	return n
	result = cls.window_reverse(n, state)
	state.window_args = None
	return result

	if not config.force_apply_attn2:
	model.set_model_attn1_patch(attn_patch)
	model.set_model_attn1_output_patch(attn_output_patch)
	else:
	model.set_model_attn2_patch(attn_patch)
	model.set_model_attn2_output_patch(attn_output_patch)
	return (model,)


	class ApplyMSWMSAAttentionSimple(metaclass=IntegratedNode):
	"""Class representing a simplified version of MSW-MSA Attention."""
	RETURN_TYPES = ("MODEL",)
	OUTPUT_TOOLTIPS = ("Model patched with the MSW-MSA attention effect.",)
	FUNCTION = "go"
	CATEGORY = "model_patches/unet"
	DESCRIPTION = "This node applies an attention patch which _may_ slightly improve quality especially when generating at high resolutions. It is a large performance increase on SD1.x, may improve performance on SDXL. This is the simplified version of the node with less parameters. Use ApplyMSWMSAAttention if you require more control. NOTE: Only supports SD1.x, SD2.x and SDXL."

	@classmethod
	def INPUT_TYPES(cls) -> dict:
	"""#### Get input types for the class.

	#### Returns:
	- `dict`: The input types.
	"""
	return {
	"required": {
	"model_type": (
	("auto", "SD15", "SDXL"),
	{
	"tooltip": "Model type being patched. Generally safe to leave on auto. Choose SD15 for SD 1.4, SD 2.x.",
	},
	),
	"model": (
	"MODEL",
	{
	"tooltip": "Model to patch with the MSW-MSA attention effect.",
	},
	),
	},
	}

	@classmethod
	def go(
	cls,
	model_type: str \| ModelType,
	model: ModelPatcher.ModelPatcher,
	) -> tuple[ModelPatcher.ModelPatcher]:
	"""#### Apply the MSW-MSA attention patch.

	#### Args:
	- `model_type` (str \| ModelType): The model type.
	- `model` (ModelPatcher.ModelPatcher): The model patcher.

	#### Returns:
	- `tuple[ModelPatcher.ModelPatcher]`: The patched model.
	"""
	if model_type == "auto":
	guessed_model_type = guess_model_type(model)
	if guessed_model_type not in SIMPLE_PRESETS:
	raise RuntimeError("Unable to guess model type")
	model_type = guessed_model_type
	else:
	model_type = ModelType(model_type)
	preset = SIMPLE_PRESETS.get(model_type)
	if preset is None:
	errstr = f"Unknown model type {model_type!s}"
	raise ValueError(errstr)
	logger.info(
	f"** ApplyMSWMSAAttentionSimple: Using preset {model_type!s}: in/mid/out blocks [{preset.pretty_blocks}], start/end percent {preset.start_time:.2}/{preset.end_time:.2}",
	)
	return ApplyMSWMSAAttention.patch(model=model, **preset.as_dict)


	__all__ = ("ApplyMSWMSAAttention", "ApplyMSWMSAAttentionSimple")