import argparse import os import random import sys import numpy as np import torch from PIL import Image sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) from modules.AutoDetailer import SAM, SEGS, ADetailer, bbox from modules.AutoEncoders import VariationalAE from modules.clip import Clip from modules.FileManaging import Downloader, ImageSaver, Loader from modules.hidiffusion import msw_msa_attention from modules.Model import LoRas from modules.Quantize import Quantizer from modules.sample import sampling from modules.UltimateSDUpscale import UltimateSDUpscale, USDU_upscaler from modules.Utilities import Enhancer, Latent, upscale from modules.WaveSpeed import fbcache_nodes from modules.AutoHDR import ahdr with open(os.path.join("./_internal/", "last_seed.txt"), "r") as f: last_seed = int(f.read()) Downloader.CheckAndDownload() def pipeline( prompt: str, w: int, h: int, number: int = 1, batch: int = 1, hires_fix: bool = False, adetailer: bool = False, enhance_prompt: bool = False, img2img: bool = False, stable_fast: bool = False, reuse_seed: bool = False, flux_enabled: bool = False, prio_speed: bool = False, autohdr: bool = False, realistic_model: bool = False, ) -> None: """#### Run the LightDiffusion pipeline. #### Args: - `prompt` (str): The prompt for the pipeline. - `w` (int): The width of the generated image. - `h` (int): The height of the generated image. - `hires_fix` (bool, optional): Enable high-resolution fix. Defaults to False. - `adetailer` (bool, optional): Enable automatic face and body enhancing. Defaults to False. - `enhance_prompt` (bool, optional): Enable Ollama prompt enhancement. Defaults to False. - `img2img` (bool, optional): Use LightDiffusion in Image to Image mode, the prompt input becomes the path to the input image. Defaults to False. - `stable_fast` (bool, optional): Enable Stable-Fast speedup offering a 70% speed improvement in return of a compilation time. Defaults to False. - `reuse_seed` (bool, optional): Reuse the last used seed, if False the seed will be kept random. Default to False. - `flux_enabled` (bool, optional): Enable the flux mode. Defaults to False. - `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False. - `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False. - `realistic_model` (bool, optional): Use the realistic model. Defaults to False. """ global last_seed if reuse_seed: seed = last_seed else: seed = random.randint(1, 2**64) last_seed = seed with open(os.path.join("./_internal/", "last_seed.txt"), "w") as f: f.write(str(seed)) if enhance_prompt: try: prompt = Enhancer.enhance_prompt(prompt) except: pass sampler_name = "dpmpp_sde_cfgpp" if not prio_speed else "dpmpp_2m_cfgpp" ckpt = ( "./_internal/checkpoints/Meina V10 - baked VAE.safetensors" if not realistic_model else "./_internal/checkpoints/DreamShaper_8_pruned.safetensors" ) with torch.inference_mode(): if not flux_enabled: checkpointloadersimple = Loader.CheckpointLoaderSimple() checkpointloadersimple_241 = checkpointloadersimple.load_checkpoint( ckpt_name=ckpt ) hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple() cliptextencode = Clip.CLIPTextEncode() emptylatentimage = Latent.EmptyLatentImage() ksampler_instance = sampling.KSampler() vaedecode = VariationalAE.VAEDecode() saveimage = ImageSaver.SaveImage() latent_upscale = upscale.LatentUpscale() hdr = ahdr.HDREffects() for _ in range(number): if img2img: img = Image.open(prompt) img_array = np.array(img) img_tensor = torch.from_numpy(img_array).float().to("cpu") / 255.0 img_tensor = img_tensor.unsqueeze(0) with torch.inference_mode(): ultimatesdupscale = UltimateSDUpscale.UltimateSDUpscale() try: loraloader = LoRas.LoraLoader() loraloader_274 = loraloader.load_lora( lora_name="add_detail.safetensors", strength_model=2, strength_clip=2, model=checkpointloadersimple_241[0], clip=checkpointloadersimple_241[1], ) except: loraloader_274 = checkpointloadersimple_241 if stable_fast is True: from modules.StableFast import StableFast applystablefast = StableFast.ApplyStableFastUnet() applystablefast_158 = applystablefast.apply_stable_fast( enable_cuda_graph=False, model=loraloader_274[0], ) else: applystablefast_158 = loraloader_274 clipsetlastlayer = Clip.CLIPSetLastLayer() clipsetlastlayer_257 = clipsetlastlayer.set_last_layer( stop_at_clip_layer=-2, clip=loraloader_274[1] ) cliptextencode_242 = cliptextencode.encode( text=prompt, clip=clipsetlastlayer_257[0], ) cliptextencode_243 = cliptextencode.encode( text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)", clip=clipsetlastlayer_257[0], ) upscalemodelloader = USDU_upscaler.UpscaleModelLoader() upscalemodelloader_244 = upscalemodelloader.load_model( "RealESRGAN_x4plus.pth" ) ultimatesdupscale_250 = ultimatesdupscale.upscale( upscale_by=2, seed=random.randint(1, 2**64), steps=8, cfg=6, sampler_name=sampler_name, scheduler="karras", denoise=0.3, mode_type="Linear", tile_width=512, tile_height=512, mask_blur=16, tile_padding=32, seam_fix_mode="Half Tile", seam_fix_denoise=0.2, seam_fix_width=64, seam_fix_mask_blur=16, seam_fix_padding=32, force_uniform_tiles="enable", image=img_tensor, model=applystablefast_158[0], positive=cliptextencode_242[0], negative=cliptextencode_243[0], vae=checkpointloadersimple_241[2], upscale_model=upscalemodelloader_244[0], pipeline=True, ) saveimage.save_images( filename_prefix="LD-I2I", images=hdr.apply_hdr2(ultimatesdupscale_250[0]) if autohdr else ultimatesdupscale_250[0], ) elif flux_enabled: Downloader.CheckAndDownloadFlux() with torch.inference_mode(): dualcliploadergguf = Quantizer.DualCLIPLoaderGGUF() emptylatentimage = Latent.EmptyLatentImage() vaeloader = VariationalAE.VAELoader() unetloadergguf = Quantizer.UnetLoaderGGUF() cliptextencodeflux = Quantizer.CLIPTextEncodeFlux() conditioningzeroout = Quantizer.ConditioningZeroOut() ksampler = sampling.KSampler() unetloadergguf_10 = unetloadergguf.load_unet( unet_name="flux1-dev-Q8_0.gguf" ) fb_cache = fbcache_nodes.ApplyFBCacheOnModel() unetloadergguf_10 = fb_cache.patch( unetloadergguf_10, "diffusion_model", 0.120 ) vaeloader_11 = vaeloader.load_vae(vae_name="ae.safetensors") dualcliploadergguf_19 = dualcliploadergguf.load_clip( clip_name1="clip_l.safetensors", clip_name2="t5-v1_1-xxl-encoder-Q8_0.gguf", type="flux", ) emptylatentimage_5 = emptylatentimage.generate( width=w, height=h, batch_size=batch ) cliptextencodeflux_15 = cliptextencodeflux.encode( clip_l=prompt, t5xxl=prompt, guidance=3.0, clip=dualcliploadergguf_19[0], flux_enabled=True, ) conditioningzeroout_16 = conditioningzeroout.zero_out( conditioning=cliptextencodeflux_15[0] ) ksampler_3 = ksampler.sample( seed=random.randint(1, 2**64), steps=20, cfg=1, sampler_name="euler_cfgpp", scheduler="beta", denoise=1, model=unetloadergguf_10[0], positive=cliptextencodeflux_15[0], negative=conditioningzeroout_16[0], latent_image=emptylatentimage_5[0], pipeline=True, flux=True, ) vaedecode_8 = vaedecode.decode( samples=ksampler_3[0], vae=vaeloader_11[0], flux=True, ) saveimage.save_images( filename_prefix="LD-Flux", images=hdr.apply_hdr2(vaedecode_8[0]) if autohdr else vaedecode_8[0], ) else: while prompt is None: pass with torch.inference_mode(): try: loraloader = LoRas.LoraLoader() loraloader_274 = loraloader.load_lora( lora_name="add_detail.safetensors", strength_model=0.7, strength_clip=0.7, model=checkpointloadersimple_241[0], clip=checkpointloadersimple_241[1], ) print("loading add_detail.safetensors") except: loraloader_274 = checkpointloadersimple_241 clipsetlastlayer = Clip.CLIPSetLastLayer() clipsetlastlayer_257 = clipsetlastlayer.set_last_layer( stop_at_clip_layer=-2, clip=loraloader_274[1] ) applystablefast_158 = loraloader_274 cliptextencode_242 = cliptextencode.encode( text=prompt, clip=clipsetlastlayer_257[0], ) cliptextencode_243 = cliptextencode.encode( text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)", clip=clipsetlastlayer_257[0], ) emptylatentimage_244 = emptylatentimage.generate( width=w, height=h, batch_size=batch ) if stable_fast is True: from modules.StableFast import StableFast applystablefast = StableFast.ApplyStableFastUnet() applystablefast_158 = applystablefast.apply_stable_fast( enable_cuda_graph=False, model=loraloader_274[0], ) else: applystablefast_158 = loraloader_274 # fb_cache = fbcache_nodes.ApplyFBCacheOnModel() # applystablefast_158 = fb_cache.patch( # applystablefast_158, "diffusion_model", 0.120 # ) ksampler_239 = ksampler_instance.sample( seed=seed, steps=20, cfg=7, sampler_name=sampler_name, scheduler="karras", denoise=1, pipeline=True, model=hidiffoptimizer.go( model_type="auto", model=applystablefast_158[0] )[0], positive=cliptextencode_242[0], negative=cliptextencode_243[0], latent_image=emptylatentimage_244[0], ) if hires_fix: latentupscale_254 = latent_upscale.upscale( width=w * 2, height=h * 2, samples=ksampler_239[0], ) ksampler_253 = ksampler_instance.sample( seed=random.randint(1, 2**64), steps=10, cfg=8, sampler_name="euler_ancestral_cfgpp", scheduler="normal", denoise=0.45, model=hidiffoptimizer.go( model_type="auto", model=applystablefast_158[0] )[0], positive=cliptextencode_242[0], negative=cliptextencode_243[0], latent_image=latentupscale_254[0], pipeline=True, ) else: ksampler_253 = ksampler_239 vaedecode_240 = vaedecode.decode( samples=ksampler_253[0], vae=checkpointloadersimple_241[2], ) if adetailer: with torch.inference_mode(): samloader = SAM.SAMLoader() samloader_87 = samloader.load_model( model_name="sam_vit_b_01ec64.pth", device_mode="AUTO" ) cliptextencode_124 = cliptextencode.encode( text="royal, detailed, magnificient, beautiful, seducing", clip=loraloader_274[1], ) ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider() ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit( # model_name="face_yolov8m.pt" model_name="person_yolov8m-seg.pt" ) bboxdetectorsegs = bbox.BboxDetectorForEach() samdetectorcombined = SAM.SAMDetectorCombined() impactsegsandmask = SEGS.SegsBitwiseAndMask() detailerforeachdebug = ADetailer.DetailerForEachTest() bboxdetectorsegs_132 = bboxdetectorsegs.doit( threshold=0.5, dilation=10, crop_factor=2, drop_size=10, labels="all", bbox_detector=ultralyticsdetectorprovider_151[0], image=vaedecode_240[0], ) samdetectorcombined_139 = samdetectorcombined.doit( detection_hint="center-1", dilation=0, threshold=0.93, bbox_expansion=0, mask_hint_threshold=0.7, mask_hint_use_negative="False", sam_model=samloader_87[0], segs=bboxdetectorsegs_132, image=vaedecode_240[0], ) if samdetectorcombined_139 is None: return impactsegsandmask_152 = impactsegsandmask.doit( segs=bboxdetectorsegs_132, mask=samdetectorcombined_139[0], ) detailerforeachdebug_145 = detailerforeachdebug.doit( guide_size=512, guide_size_for=False, max_size=768, seed=random.randint(1, 2**64), steps=20, cfg=6.5, sampler_name=sampler_name, scheduler="karras", denoise=0.5, feather=5, noise_mask=True, force_inpaint=True, wildcard="", cycle=1, inpaint_model=False, noise_mask_feather=20, image=vaedecode_240[0], segs=impactsegsandmask_152[0], model=applystablefast_158[0], clip=checkpointloadersimple_241[1], vae=checkpointloadersimple_241[2], positive=cliptextencode_124[0], negative=cliptextencode_243[0], pipeline=True, ) saveimage.save_images( filename_prefix="LD-body", images=hdr.apply_hdr2(detailerforeachdebug_145[0]) if autohdr else detailerforeachdebug_145[0], ) ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider() ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit( model_name="face_yolov9c.pt" ) bboxdetectorsegs_132 = bboxdetectorsegs.doit( threshold=0.5, dilation=10, crop_factor=2, drop_size=10, labels="all", bbox_detector=ultralyticsdetectorprovider_151[0], image=detailerforeachdebug_145[0], ) samdetectorcombined_139 = samdetectorcombined.doit( detection_hint="center-1", dilation=0, threshold=0.93, bbox_expansion=0, mask_hint_threshold=0.7, mask_hint_use_negative="False", sam_model=samloader_87[0], segs=bboxdetectorsegs_132, image=detailerforeachdebug_145[0], ) impactsegsandmask_152 = impactsegsandmask.doit( segs=bboxdetectorsegs_132, mask=samdetectorcombined_139[0], ) detailerforeachdebug_145 = detailerforeachdebug.doit( guide_size=512, guide_size_for=False, max_size=768, seed=random.randint(1, 2**64), steps=20, cfg=6.5, sampler_name=sampler_name, scheduler="karras", denoise=0.5, feather=5, noise_mask=True, force_inpaint=True, wildcard="", cycle=1, inpaint_model=False, noise_mask_feather=20, image=detailerforeachdebug_145[0], segs=impactsegsandmask_152[0], model=applystablefast_158[0], clip=checkpointloadersimple_241[1], vae=checkpointloadersimple_241[2], positive=cliptextencode_124[0], negative=cliptextencode_243[0], pipeline=True, ) saveimage.save_images( filename_prefix="LD-head", images=hdr.apply_hdr2(detailerforeachdebug_145[0]) if autohdr else detailerforeachdebug_145[0], ) else: saveimage.save_images( filename_prefix="LD-HF" if hires_fix else "LD", images=hdr.apply_hdr2(vaedecode_240[0]) if autohdr else vaedecode_240[0], ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run the LightDiffusion pipeline.") parser.add_argument("prompt", type=str, help="The prompt for the pipeline.") parser.add_argument("width", type=int, help="The width of the generated image.") parser.add_argument("height", type=int, help="The height of the generated image.") parser.add_argument("number", type=int, help="The number of images to generate.") parser.add_argument( "batch", type=int, help="The batch size. aka the number of images to generate at once.", ) parser.add_argument( "--hires-fix", action="store_true", help="Enable high-resolution fix." ) parser.add_argument( "--adetailer", action="store_true", help="Enable automatic face and body enhancin.g", ) parser.add_argument( "--enhance-prompt", action="store_true", help="Enable Ollama prompt enhancement. Make sure to have ollama with Ollama installed.", ) parser.add_argument( "--img2img", action="store_true", help="Enable image-to-image mode. This will use the prompt as path to the image.", ) parser.add_argument( "--stable-fast", action="store_true", help="Enable StableFast mode. This will compile the model for faster inference.", ) parser.add_argument( "--reuse-seed", action="store_true", help="Enable to reuse last used seed for sampling, default for False is a random seed at every use.", ) parser.add_argument( "--flux", action="store_true", help="Enable the flux mode.", ) parser.add_argument( "--prio-speed", action="store_true", help="Prioritize speed over quality.", ) parser.add_argument( "--autohdr", action="store_true", help="Enable the AutoHDR mode.", ) parser.add_argument( "--realistic-model", action="store_true", help="Use the realistic model.", ) args = parser.parse_args() pipeline( args.prompt, args.width, args.height, args.number, args.batch, args.hires_fix, args.adetailer, args.enhance_prompt, args.img2img, args.stable_fast, args.reuse_seed, args.flux, args.prio_speed, args.autohdr, args.realistic_model, )