Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

LightDiffusion-Next / modules /user /pipeline.py

Aatricks

Upload folder using huggingface_hub

1264e6e verified 29 days ago

raw

history blame contribute delete

24.2 kB

	import argparse
	import os
	import random
	import sys

	import numpy as np
	import torch
	from PIL import Image

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))

	from modules.AutoDetailer import SAM, SEGS, ADetailer, bbox
	from modules.AutoEncoders import VariationalAE
	from modules.clip import Clip
	from modules.FileManaging import Downloader, ImageSaver, Loader
	from modules.hidiffusion import msw_msa_attention
	from modules.Model import LoRas
	from modules.Quantize import Quantizer
	from modules.sample import sampling
	from modules.UltimateSDUpscale import UltimateSDUpscale, USDU_upscaler
	from modules.Utilities import Enhancer, Latent, upscale
	from modules.WaveSpeed import fbcache_nodes
	from modules.AutoHDR import ahdr

	with open(os.path.join("./_internal/", "last_seed.txt"), "r") as f:
	last_seed = int(f.read())

	Downloader.CheckAndDownload()


	def pipeline(
	prompt: str,
	w: int,
	h: int,
	number: int = 1,
	batch: int = 1,
	hires_fix: bool = False,
	adetailer: bool = False,
	enhance_prompt: bool = False,
	img2img: bool = False,
	stable_fast: bool = False,
	reuse_seed: bool = False,
	flux_enabled: bool = False,
	prio_speed: bool = False,
	autohdr: bool = False,
	realistic_model: bool = False,
	) -> None:
	"""#### Run the LightDiffusion pipeline.

	#### Args:
	- `prompt` (str): The prompt for the pipeline.
	- `w` (int): The width of the generated image.
	- `h` (int): The height of the generated image.
	- `hires_fix` (bool, optional): Enable high-resolution fix. Defaults to False.
	- `adetailer` (bool, optional): Enable automatic face and body enhancing. Defaults to False.
	- `enhance_prompt` (bool, optional): Enable Ollama prompt enhancement. Defaults to False.
	- `img2img` (bool, optional): Use LightDiffusion in Image to Image mode, the prompt input becomes the path to the input image. Defaults to False.
	- `stable_fast` (bool, optional): Enable Stable-Fast speedup offering a 70% speed improvement in return of a compilation time. Defaults to False.
	- `reuse_seed` (bool, optional): Reuse the last used seed, if False the seed will be kept random. Default to False.
	- `flux_enabled` (bool, optional): Enable the flux mode. Defaults to False.
	- `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False.
	- `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False.
	- `realistic_model` (bool, optional): Use the realistic model. Defaults to False.
	"""
	global last_seed
	if reuse_seed:
	seed = last_seed

	else:
	seed = random.randint(1, 2**64)
	last_seed = seed
	with open(os.path.join("./_internal/", "last_seed.txt"), "w") as f:
	f.write(str(seed))
	if enhance_prompt:
	try:
	prompt = Enhancer.enhance_prompt(prompt)
	except:
	pass

	sampler_name = "dpmpp_sde_cfgpp" if not prio_speed else "dpmpp_2m_cfgpp"
	ckpt = (
	"./_internal/checkpoints/Meina V10 - baked VAE.safetensors"
	if not realistic_model
	else "./_internal/checkpoints/DreamShaper_8_pruned.safetensors"
	)
	with torch.inference_mode():
	if not flux_enabled:
	checkpointloadersimple = Loader.CheckpointLoaderSimple()
	checkpointloadersimple_241 = checkpointloadersimple.load_checkpoint(
	ckpt_name=ckpt
	)
	hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple()
	cliptextencode = Clip.CLIPTextEncode()
	emptylatentimage = Latent.EmptyLatentImage()
	ksampler_instance = sampling.KSampler()
	vaedecode = VariationalAE.VAEDecode()
	saveimage = ImageSaver.SaveImage()
	latent_upscale = upscale.LatentUpscale()
	hdr = ahdr.HDREffects()
	for _ in range(number):
	if img2img:
	img = Image.open(prompt)
	img_array = np.array(img)
	img_tensor = torch.from_numpy(img_array).float().to("cpu") / 255.0
	img_tensor = img_tensor.unsqueeze(0)
	with torch.inference_mode():
	ultimatesdupscale = UltimateSDUpscale.UltimateSDUpscale()
	try:
	loraloader = LoRas.LoraLoader()
	loraloader_274 = loraloader.load_lora(
	lora_name="add_detail.safetensors",
	strength_model=2,
	strength_clip=2,
	model=checkpointloadersimple_241[0],
	clip=checkpointloadersimple_241[1],
	)
	except:
	loraloader_274 = checkpointloadersimple_241

	if stable_fast is True:
	from modules.StableFast import StableFast

	applystablefast = StableFast.ApplyStableFastUnet()
	applystablefast_158 = applystablefast.apply_stable_fast(
	enable_cuda_graph=False,
	model=loraloader_274[0],
	)
	else:
	applystablefast_158 = loraloader_274

	clipsetlastlayer = Clip.CLIPSetLastLayer()
	clipsetlastlayer_257 = clipsetlastlayer.set_last_layer(
	stop_at_clip_layer=-2, clip=loraloader_274[1]
	)

	cliptextencode_242 = cliptextencode.encode(
	text=prompt,
	clip=clipsetlastlayer_257[0],
	)
	cliptextencode_243 = cliptextencode.encode(
	text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)",
	clip=clipsetlastlayer_257[0],
	)
	upscalemodelloader = USDU_upscaler.UpscaleModelLoader()
	upscalemodelloader_244 = upscalemodelloader.load_model(
	"RealESRGAN_x4plus.pth"
	)
	ultimatesdupscale_250 = ultimatesdupscale.upscale(
	upscale_by=2,
	seed=random.randint(1, 2**64),
	steps=8,
	cfg=6,
	sampler_name=sampler_name,
	scheduler="karras",
	denoise=0.3,
	mode_type="Linear",
	tile_width=512,
	tile_height=512,
	mask_blur=16,
	tile_padding=32,
	seam_fix_mode="Half Tile",
	seam_fix_denoise=0.2,
	seam_fix_width=64,
	seam_fix_mask_blur=16,
	seam_fix_padding=32,
	force_uniform_tiles="enable",
	image=img_tensor,
	model=applystablefast_158[0],
	positive=cliptextencode_242[0],
	negative=cliptextencode_243[0],
	vae=checkpointloadersimple_241[2],
	upscale_model=upscalemodelloader_244[0],
	pipeline=True,
	)
	saveimage.save_images(
	filename_prefix="LD-I2I",
	images=hdr.apply_hdr2(ultimatesdupscale_250[0])
	if autohdr
	else ultimatesdupscale_250[0],
	)
	elif flux_enabled:
	Downloader.CheckAndDownloadFlux()
	with torch.inference_mode():
	dualcliploadergguf = Quantizer.DualCLIPLoaderGGUF()
	emptylatentimage = Latent.EmptyLatentImage()
	vaeloader = VariationalAE.VAELoader()
	unetloadergguf = Quantizer.UnetLoaderGGUF()
	cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
	conditioningzeroout = Quantizer.ConditioningZeroOut()
	ksampler = sampling.KSampler()
	unetloadergguf_10 = unetloadergguf.load_unet(
	unet_name="flux1-dev-Q8_0.gguf"
	)
	fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
	unetloadergguf_10 = fb_cache.patch(
	unetloadergguf_10, "diffusion_model", 0.120
	)
	vaeloader_11 = vaeloader.load_vae(vae_name="ae.safetensors")
	dualcliploadergguf_19 = dualcliploadergguf.load_clip(
	clip_name1="clip_l.safetensors",
	clip_name2="t5-v1_1-xxl-encoder-Q8_0.gguf",
	type="flux",
	)
	emptylatentimage_5 = emptylatentimage.generate(
	width=w, height=h, batch_size=batch
	)
	cliptextencodeflux_15 = cliptextencodeflux.encode(
	clip_l=prompt,
	t5xxl=prompt,
	guidance=3.0,
	clip=dualcliploadergguf_19[0],
	flux_enabled=True,
	)
	conditioningzeroout_16 = conditioningzeroout.zero_out(
	conditioning=cliptextencodeflux_15[0]
	)
	ksampler_3 = ksampler.sample(
	seed=random.randint(1, 2**64),
	steps=20,
	cfg=1,
	sampler_name="euler_cfgpp",
	scheduler="beta",
	denoise=1,
	model=unetloadergguf_10[0],
	positive=cliptextencodeflux_15[0],
	negative=conditioningzeroout_16[0],
	latent_image=emptylatentimage_5[0],
	pipeline=True,
	flux=True,
	)

	vaedecode_8 = vaedecode.decode(
	samples=ksampler_3[0],
	vae=vaeloader_11[0],
	flux=True,
	)

	saveimage.save_images(
	filename_prefix="LD-Flux",
	images=hdr.apply_hdr2(vaedecode_8[0])
	if autohdr
	else vaedecode_8[0],
	)
	else:
	while prompt is None:
	pass
	with torch.inference_mode():
	try:
	loraloader = LoRas.LoraLoader()
	loraloader_274 = loraloader.load_lora(
	lora_name="add_detail.safetensors",
	strength_model=0.7,
	strength_clip=0.7,
	model=checkpointloadersimple_241[0],
	clip=checkpointloadersimple_241[1],
	)
	print("loading add_detail.safetensors")
	except:
	loraloader_274 = checkpointloadersimple_241
	clipsetlastlayer = Clip.CLIPSetLastLayer()
	clipsetlastlayer_257 = clipsetlastlayer.set_last_layer(
	stop_at_clip_layer=-2, clip=loraloader_274[1]
	)
	applystablefast_158 = loraloader_274
	cliptextencode_242 = cliptextencode.encode(
	text=prompt,
	clip=clipsetlastlayer_257[0],
	)
	cliptextencode_243 = cliptextencode.encode(
	text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)",
	clip=clipsetlastlayer_257[0],
	)
	emptylatentimage_244 = emptylatentimage.generate(
	width=w, height=h, batch_size=batch
	)
	if stable_fast is True:
	from modules.StableFast import StableFast

	applystablefast = StableFast.ApplyStableFastUnet()
	applystablefast_158 = applystablefast.apply_stable_fast(
	enable_cuda_graph=False,
	model=loraloader_274[0],
	)
	else:
	applystablefast_158 = loraloader_274
	# fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
	# applystablefast_158 = fb_cache.patch(
	# applystablefast_158, "diffusion_model", 0.120
	# )

	ksampler_239 = ksampler_instance.sample(
	seed=seed,
	steps=20,
	cfg=7,
	sampler_name=sampler_name,
	scheduler="karras",
	denoise=1,
	pipeline=True,
	model=hidiffoptimizer.go(
	model_type="auto", model=applystablefast_158[0]
	)[0],
	positive=cliptextencode_242[0],
	negative=cliptextencode_243[0],
	latent_image=emptylatentimage_244[0],
	)
	if hires_fix:
	latentupscale_254 = latent_upscale.upscale(
	width=w * 2,
	height=h * 2,
	samples=ksampler_239[0],
	)
	ksampler_253 = ksampler_instance.sample(
	seed=random.randint(1, 2**64),
	steps=10,
	cfg=8,
	sampler_name="euler_ancestral_cfgpp",
	scheduler="normal",
	denoise=0.45,
	model=hidiffoptimizer.go(
	model_type="auto", model=applystablefast_158[0]
	)[0],
	positive=cliptextencode_242[0],
	negative=cliptextencode_243[0],
	latent_image=latentupscale_254[0],
	pipeline=True,
	)
	else:
	ksampler_253 = ksampler_239

	vaedecode_240 = vaedecode.decode(
	samples=ksampler_253[0],
	vae=checkpointloadersimple_241[2],
	)

	if adetailer:
	with torch.inference_mode():
	samloader = SAM.SAMLoader()
	samloader_87 = samloader.load_model(
	model_name="sam_vit_b_01ec64.pth", device_mode="AUTO"
	)
	cliptextencode_124 = cliptextencode.encode(
	text="royal, detailed, magnificient, beautiful, seducing",
	clip=loraloader_274[1],
	)
	ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider()
	ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit(
	# model_name="face_yolov8m.pt"
	model_name="person_yolov8m-seg.pt"
	)
	bboxdetectorsegs = bbox.BboxDetectorForEach()
	samdetectorcombined = SAM.SAMDetectorCombined()
	impactsegsandmask = SEGS.SegsBitwiseAndMask()
	detailerforeachdebug = ADetailer.DetailerForEachTest()
	bboxdetectorsegs_132 = bboxdetectorsegs.doit(
	threshold=0.5,
	dilation=10,
	crop_factor=2,
	drop_size=10,
	labels="all",
	bbox_detector=ultralyticsdetectorprovider_151[0],
	image=vaedecode_240[0],
	)
	samdetectorcombined_139 = samdetectorcombined.doit(
	detection_hint="center-1",
	dilation=0,
	threshold=0.93,
	bbox_expansion=0,
	mask_hint_threshold=0.7,
	mask_hint_use_negative="False",
	sam_model=samloader_87[0],
	segs=bboxdetectorsegs_132,
	image=vaedecode_240[0],
	)
	if samdetectorcombined_139 is None:
	return
	impactsegsandmask_152 = impactsegsandmask.doit(
	segs=bboxdetectorsegs_132,
	mask=samdetectorcombined_139[0],
	)
	detailerforeachdebug_145 = detailerforeachdebug.doit(
	guide_size=512,
	guide_size_for=False,
	max_size=768,
	seed=random.randint(1, 2**64),
	steps=20,
	cfg=6.5,
	sampler_name=sampler_name,
	scheduler="karras",
	denoise=0.5,
	feather=5,
	noise_mask=True,
	force_inpaint=True,
	wildcard="",
	cycle=1,
	inpaint_model=False,
	noise_mask_feather=20,
	image=vaedecode_240[0],
	segs=impactsegsandmask_152[0],
	model=applystablefast_158[0],
	clip=checkpointloadersimple_241[1],
	vae=checkpointloadersimple_241[2],
	positive=cliptextencode_124[0],
	negative=cliptextencode_243[0],
	pipeline=True,
	)
	saveimage.save_images(
	filename_prefix="LD-body",
	images=hdr.apply_hdr2(detailerforeachdebug_145[0])
	if autohdr
	else detailerforeachdebug_145[0],
	)
	ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider()
	ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit(
	model_name="face_yolov9c.pt"
	)
	bboxdetectorsegs_132 = bboxdetectorsegs.doit(
	threshold=0.5,
	dilation=10,
	crop_factor=2,
	drop_size=10,
	labels="all",
	bbox_detector=ultralyticsdetectorprovider_151[0],
	image=detailerforeachdebug_145[0],
	)
	samdetectorcombined_139 = samdetectorcombined.doit(
	detection_hint="center-1",
	dilation=0,
	threshold=0.93,
	bbox_expansion=0,
	mask_hint_threshold=0.7,
	mask_hint_use_negative="False",
	sam_model=samloader_87[0],
	segs=bboxdetectorsegs_132,
	image=detailerforeachdebug_145[0],
	)
	impactsegsandmask_152 = impactsegsandmask.doit(
	segs=bboxdetectorsegs_132,
	mask=samdetectorcombined_139[0],
	)
	detailerforeachdebug_145 = detailerforeachdebug.doit(
	guide_size=512,
	guide_size_for=False,
	max_size=768,
	seed=random.randint(1, 2**64),
	steps=20,
	cfg=6.5,
	sampler_name=sampler_name,
	scheduler="karras",
	denoise=0.5,
	feather=5,
	noise_mask=True,
	force_inpaint=True,
	wildcard="",
	cycle=1,
	inpaint_model=False,
	noise_mask_feather=20,
	image=detailerforeachdebug_145[0],
	segs=impactsegsandmask_152[0],
	model=applystablefast_158[0],
	clip=checkpointloadersimple_241[1],
	vae=checkpointloadersimple_241[2],
	positive=cliptextencode_124[0],
	negative=cliptextencode_243[0],
	pipeline=True,
	)
	saveimage.save_images(
	filename_prefix="LD-head",
	images=hdr.apply_hdr2(detailerforeachdebug_145[0])
	if autohdr
	else detailerforeachdebug_145[0],
	)
	else:
	saveimage.save_images(
	filename_prefix="LD-HF" if hires_fix else "LD",
	images=hdr.apply_hdr2(vaedecode_240[0])
	if autohdr
	else vaedecode_240[0],
	)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Run the LightDiffusion pipeline.")
	parser.add_argument("prompt", type=str, help="The prompt for the pipeline.")
	parser.add_argument("width", type=int, help="The width of the generated image.")
	parser.add_argument("height", type=int, help="The height of the generated image.")
	parser.add_argument("number", type=int, help="The number of images to generate.")
	parser.add_argument(
	"batch",
	type=int,
	help="The batch size. aka the number of images to generate at once.",
	)
	parser.add_argument(
	"--hires-fix", action="store_true", help="Enable high-resolution fix."
	)
	parser.add_argument(
	"--adetailer",
	action="store_true",
	help="Enable automatic face and body enhancin.g",
	)
	parser.add_argument(
	"--enhance-prompt",
	action="store_true",
	help="Enable Ollama prompt enhancement. Make sure to have ollama with Ollama installed.",
	)
	parser.add_argument(
	"--img2img",
	action="store_true",
	help="Enable image-to-image mode. This will use the prompt as path to the image.",
	)
	parser.add_argument(
	"--stable-fast",
	action="store_true",
	help="Enable StableFast mode. This will compile the model for faster inference.",
	)
	parser.add_argument(
	"--reuse-seed",
	action="store_true",
	help="Enable to reuse last used seed for sampling, default for False is a random seed at every use.",
	)
	parser.add_argument(
	"--flux",
	action="store_true",
	help="Enable the flux mode.",
	)
	parser.add_argument(
	"--prio-speed",
	action="store_true",
	help="Prioritize speed over quality.",
	)
	parser.add_argument(
	"--autohdr",
	action="store_true",
	help="Enable the AutoHDR mode.",
	)
	parser.add_argument(
	"--realistic-model",
	action="store_true",
	help="Use the realistic model.",
	)
	args = parser.parse_args()

	pipeline(
	args.prompt,
	args.width,
	args.height,
	args.number,
	args.batch,
	args.hires_fix,
	args.adetailer,
	args.enhance_prompt,
	args.img2img,
	args.stable_fast,
	args.reuse_seed,
	args.flux,
	args.prio_speed,
	args.autohdr,
	args.realistic_model,
	)