Aatricks's picture
Upload folder using huggingface_hub
1264e6e verified
import argparse
import os
import random
import sys
import numpy as np
import torch
from PIL import Image
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from modules.AutoDetailer import SAM, SEGS, ADetailer, bbox
from modules.AutoEncoders import VariationalAE
from modules.clip import Clip
from modules.FileManaging import Downloader, ImageSaver, Loader
from modules.hidiffusion import msw_msa_attention
from modules.Model import LoRas
from modules.Quantize import Quantizer
from modules.sample import sampling
from modules.UltimateSDUpscale import UltimateSDUpscale, USDU_upscaler
from modules.Utilities import Enhancer, Latent, upscale
from modules.WaveSpeed import fbcache_nodes
from modules.AutoHDR import ahdr
with open(os.path.join("./_internal/", "last_seed.txt"), "r") as f:
last_seed = int(f.read())
Downloader.CheckAndDownload()
def pipeline(
prompt: str,
w: int,
h: int,
number: int = 1,
batch: int = 1,
hires_fix: bool = False,
adetailer: bool = False,
enhance_prompt: bool = False,
img2img: bool = False,
stable_fast: bool = False,
reuse_seed: bool = False,
flux_enabled: bool = False,
prio_speed: bool = False,
autohdr: bool = False,
realistic_model: bool = False,
) -> None:
"""#### Run the LightDiffusion pipeline.
#### Args:
- `prompt` (str): The prompt for the pipeline.
- `w` (int): The width of the generated image.
- `h` (int): The height of the generated image.
- `hires_fix` (bool, optional): Enable high-resolution fix. Defaults to False.
- `adetailer` (bool, optional): Enable automatic face and body enhancing. Defaults to False.
- `enhance_prompt` (bool, optional): Enable Ollama prompt enhancement. Defaults to False.
- `img2img` (bool, optional): Use LightDiffusion in Image to Image mode, the prompt input becomes the path to the input image. Defaults to False.
- `stable_fast` (bool, optional): Enable Stable-Fast speedup offering a 70% speed improvement in return of a compilation time. Defaults to False.
- `reuse_seed` (bool, optional): Reuse the last used seed, if False the seed will be kept random. Default to False.
- `flux_enabled` (bool, optional): Enable the flux mode. Defaults to False.
- `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False.
- `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False.
- `realistic_model` (bool, optional): Use the realistic model. Defaults to False.
"""
global last_seed
if reuse_seed:
seed = last_seed
else:
seed = random.randint(1, 2**64)
last_seed = seed
with open(os.path.join("./_internal/", "last_seed.txt"), "w") as f:
f.write(str(seed))
if enhance_prompt:
try:
prompt = Enhancer.enhance_prompt(prompt)
except:
pass
sampler_name = "dpmpp_sde_cfgpp" if not prio_speed else "dpmpp_2m_cfgpp"
ckpt = (
"./_internal/checkpoints/Meina V10 - baked VAE.safetensors"
if not realistic_model
else "./_internal/checkpoints/DreamShaper_8_pruned.safetensors"
)
with torch.inference_mode():
if not flux_enabled:
checkpointloadersimple = Loader.CheckpointLoaderSimple()
checkpointloadersimple_241 = checkpointloadersimple.load_checkpoint(
ckpt_name=ckpt
)
hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple()
cliptextencode = Clip.CLIPTextEncode()
emptylatentimage = Latent.EmptyLatentImage()
ksampler_instance = sampling.KSampler()
vaedecode = VariationalAE.VAEDecode()
saveimage = ImageSaver.SaveImage()
latent_upscale = upscale.LatentUpscale()
hdr = ahdr.HDREffects()
for _ in range(number):
if img2img:
img = Image.open(prompt)
img_array = np.array(img)
img_tensor = torch.from_numpy(img_array).float().to("cpu") / 255.0
img_tensor = img_tensor.unsqueeze(0)
with torch.inference_mode():
ultimatesdupscale = UltimateSDUpscale.UltimateSDUpscale()
try:
loraloader = LoRas.LoraLoader()
loraloader_274 = loraloader.load_lora(
lora_name="add_detail.safetensors",
strength_model=2,
strength_clip=2,
model=checkpointloadersimple_241[0],
clip=checkpointloadersimple_241[1],
)
except:
loraloader_274 = checkpointloadersimple_241
if stable_fast is True:
from modules.StableFast import StableFast
applystablefast = StableFast.ApplyStableFastUnet()
applystablefast_158 = applystablefast.apply_stable_fast(
enable_cuda_graph=False,
model=loraloader_274[0],
)
else:
applystablefast_158 = loraloader_274
clipsetlastlayer = Clip.CLIPSetLastLayer()
clipsetlastlayer_257 = clipsetlastlayer.set_last_layer(
stop_at_clip_layer=-2, clip=loraloader_274[1]
)
cliptextencode_242 = cliptextencode.encode(
text=prompt,
clip=clipsetlastlayer_257[0],
)
cliptextencode_243 = cliptextencode.encode(
text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)",
clip=clipsetlastlayer_257[0],
)
upscalemodelloader = USDU_upscaler.UpscaleModelLoader()
upscalemodelloader_244 = upscalemodelloader.load_model(
"RealESRGAN_x4plus.pth"
)
ultimatesdupscale_250 = ultimatesdupscale.upscale(
upscale_by=2,
seed=random.randint(1, 2**64),
steps=8,
cfg=6,
sampler_name=sampler_name,
scheduler="karras",
denoise=0.3,
mode_type="Linear",
tile_width=512,
tile_height=512,
mask_blur=16,
tile_padding=32,
seam_fix_mode="Half Tile",
seam_fix_denoise=0.2,
seam_fix_width=64,
seam_fix_mask_blur=16,
seam_fix_padding=32,
force_uniform_tiles="enable",
image=img_tensor,
model=applystablefast_158[0],
positive=cliptextencode_242[0],
negative=cliptextencode_243[0],
vae=checkpointloadersimple_241[2],
upscale_model=upscalemodelloader_244[0],
pipeline=True,
)
saveimage.save_images(
filename_prefix="LD-I2I",
images=hdr.apply_hdr2(ultimatesdupscale_250[0])
if autohdr
else ultimatesdupscale_250[0],
)
elif flux_enabled:
Downloader.CheckAndDownloadFlux()
with torch.inference_mode():
dualcliploadergguf = Quantizer.DualCLIPLoaderGGUF()
emptylatentimage = Latent.EmptyLatentImage()
vaeloader = VariationalAE.VAELoader()
unetloadergguf = Quantizer.UnetLoaderGGUF()
cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
conditioningzeroout = Quantizer.ConditioningZeroOut()
ksampler = sampling.KSampler()
unetloadergguf_10 = unetloadergguf.load_unet(
unet_name="flux1-dev-Q8_0.gguf"
)
fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
unetloadergguf_10 = fb_cache.patch(
unetloadergguf_10, "diffusion_model", 0.120
)
vaeloader_11 = vaeloader.load_vae(vae_name="ae.safetensors")
dualcliploadergguf_19 = dualcliploadergguf.load_clip(
clip_name1="clip_l.safetensors",
clip_name2="t5-v1_1-xxl-encoder-Q8_0.gguf",
type="flux",
)
emptylatentimage_5 = emptylatentimage.generate(
width=w, height=h, batch_size=batch
)
cliptextencodeflux_15 = cliptextencodeflux.encode(
clip_l=prompt,
t5xxl=prompt,
guidance=3.0,
clip=dualcliploadergguf_19[0],
flux_enabled=True,
)
conditioningzeroout_16 = conditioningzeroout.zero_out(
conditioning=cliptextencodeflux_15[0]
)
ksampler_3 = ksampler.sample(
seed=random.randint(1, 2**64),
steps=20,
cfg=1,
sampler_name="euler_cfgpp",
scheduler="beta",
denoise=1,
model=unetloadergguf_10[0],
positive=cliptextencodeflux_15[0],
negative=conditioningzeroout_16[0],
latent_image=emptylatentimage_5[0],
pipeline=True,
flux=True,
)
vaedecode_8 = vaedecode.decode(
samples=ksampler_3[0],
vae=vaeloader_11[0],
flux=True,
)
saveimage.save_images(
filename_prefix="LD-Flux",
images=hdr.apply_hdr2(vaedecode_8[0])
if autohdr
else vaedecode_8[0],
)
else:
while prompt is None:
pass
with torch.inference_mode():
try:
loraloader = LoRas.LoraLoader()
loraloader_274 = loraloader.load_lora(
lora_name="add_detail.safetensors",
strength_model=0.7,
strength_clip=0.7,
model=checkpointloadersimple_241[0],
clip=checkpointloadersimple_241[1],
)
print("loading add_detail.safetensors")
except:
loraloader_274 = checkpointloadersimple_241
clipsetlastlayer = Clip.CLIPSetLastLayer()
clipsetlastlayer_257 = clipsetlastlayer.set_last_layer(
stop_at_clip_layer=-2, clip=loraloader_274[1]
)
applystablefast_158 = loraloader_274
cliptextencode_242 = cliptextencode.encode(
text=prompt,
clip=clipsetlastlayer_257[0],
)
cliptextencode_243 = cliptextencode.encode(
text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)",
clip=clipsetlastlayer_257[0],
)
emptylatentimage_244 = emptylatentimage.generate(
width=w, height=h, batch_size=batch
)
if stable_fast is True:
from modules.StableFast import StableFast
applystablefast = StableFast.ApplyStableFastUnet()
applystablefast_158 = applystablefast.apply_stable_fast(
enable_cuda_graph=False,
model=loraloader_274[0],
)
else:
applystablefast_158 = loraloader_274
# fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
# applystablefast_158 = fb_cache.patch(
# applystablefast_158, "diffusion_model", 0.120
# )
ksampler_239 = ksampler_instance.sample(
seed=seed,
steps=20,
cfg=7,
sampler_name=sampler_name,
scheduler="karras",
denoise=1,
pipeline=True,
model=hidiffoptimizer.go(
model_type="auto", model=applystablefast_158[0]
)[0],
positive=cliptextencode_242[0],
negative=cliptextencode_243[0],
latent_image=emptylatentimage_244[0],
)
if hires_fix:
latentupscale_254 = latent_upscale.upscale(
width=w * 2,
height=h * 2,
samples=ksampler_239[0],
)
ksampler_253 = ksampler_instance.sample(
seed=random.randint(1, 2**64),
steps=10,
cfg=8,
sampler_name="euler_ancestral_cfgpp",
scheduler="normal",
denoise=0.45,
model=hidiffoptimizer.go(
model_type="auto", model=applystablefast_158[0]
)[0],
positive=cliptextencode_242[0],
negative=cliptextencode_243[0],
latent_image=latentupscale_254[0],
pipeline=True,
)
else:
ksampler_253 = ksampler_239
vaedecode_240 = vaedecode.decode(
samples=ksampler_253[0],
vae=checkpointloadersimple_241[2],
)
if adetailer:
with torch.inference_mode():
samloader = SAM.SAMLoader()
samloader_87 = samloader.load_model(
model_name="sam_vit_b_01ec64.pth", device_mode="AUTO"
)
cliptextencode_124 = cliptextencode.encode(
text="royal, detailed, magnificient, beautiful, seducing",
clip=loraloader_274[1],
)
ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider()
ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit(
# model_name="face_yolov8m.pt"
model_name="person_yolov8m-seg.pt"
)
bboxdetectorsegs = bbox.BboxDetectorForEach()
samdetectorcombined = SAM.SAMDetectorCombined()
impactsegsandmask = SEGS.SegsBitwiseAndMask()
detailerforeachdebug = ADetailer.DetailerForEachTest()
bboxdetectorsegs_132 = bboxdetectorsegs.doit(
threshold=0.5,
dilation=10,
crop_factor=2,
drop_size=10,
labels="all",
bbox_detector=ultralyticsdetectorprovider_151[0],
image=vaedecode_240[0],
)
samdetectorcombined_139 = samdetectorcombined.doit(
detection_hint="center-1",
dilation=0,
threshold=0.93,
bbox_expansion=0,
mask_hint_threshold=0.7,
mask_hint_use_negative="False",
sam_model=samloader_87[0],
segs=bboxdetectorsegs_132,
image=vaedecode_240[0],
)
if samdetectorcombined_139 is None:
return
impactsegsandmask_152 = impactsegsandmask.doit(
segs=bboxdetectorsegs_132,
mask=samdetectorcombined_139[0],
)
detailerforeachdebug_145 = detailerforeachdebug.doit(
guide_size=512,
guide_size_for=False,
max_size=768,
seed=random.randint(1, 2**64),
steps=20,
cfg=6.5,
sampler_name=sampler_name,
scheduler="karras",
denoise=0.5,
feather=5,
noise_mask=True,
force_inpaint=True,
wildcard="",
cycle=1,
inpaint_model=False,
noise_mask_feather=20,
image=vaedecode_240[0],
segs=impactsegsandmask_152[0],
model=applystablefast_158[0],
clip=checkpointloadersimple_241[1],
vae=checkpointloadersimple_241[2],
positive=cliptextencode_124[0],
negative=cliptextencode_243[0],
pipeline=True,
)
saveimage.save_images(
filename_prefix="LD-body",
images=hdr.apply_hdr2(detailerforeachdebug_145[0])
if autohdr
else detailerforeachdebug_145[0],
)
ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider()
ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit(
model_name="face_yolov9c.pt"
)
bboxdetectorsegs_132 = bboxdetectorsegs.doit(
threshold=0.5,
dilation=10,
crop_factor=2,
drop_size=10,
labels="all",
bbox_detector=ultralyticsdetectorprovider_151[0],
image=detailerforeachdebug_145[0],
)
samdetectorcombined_139 = samdetectorcombined.doit(
detection_hint="center-1",
dilation=0,
threshold=0.93,
bbox_expansion=0,
mask_hint_threshold=0.7,
mask_hint_use_negative="False",
sam_model=samloader_87[0],
segs=bboxdetectorsegs_132,
image=detailerforeachdebug_145[0],
)
impactsegsandmask_152 = impactsegsandmask.doit(
segs=bboxdetectorsegs_132,
mask=samdetectorcombined_139[0],
)
detailerforeachdebug_145 = detailerforeachdebug.doit(
guide_size=512,
guide_size_for=False,
max_size=768,
seed=random.randint(1, 2**64),
steps=20,
cfg=6.5,
sampler_name=sampler_name,
scheduler="karras",
denoise=0.5,
feather=5,
noise_mask=True,
force_inpaint=True,
wildcard="",
cycle=1,
inpaint_model=False,
noise_mask_feather=20,
image=detailerforeachdebug_145[0],
segs=impactsegsandmask_152[0],
model=applystablefast_158[0],
clip=checkpointloadersimple_241[1],
vae=checkpointloadersimple_241[2],
positive=cliptextencode_124[0],
negative=cliptextencode_243[0],
pipeline=True,
)
saveimage.save_images(
filename_prefix="LD-head",
images=hdr.apply_hdr2(detailerforeachdebug_145[0])
if autohdr
else detailerforeachdebug_145[0],
)
else:
saveimage.save_images(
filename_prefix="LD-HF" if hires_fix else "LD",
images=hdr.apply_hdr2(vaedecode_240[0])
if autohdr
else vaedecode_240[0],
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the LightDiffusion pipeline.")
parser.add_argument("prompt", type=str, help="The prompt for the pipeline.")
parser.add_argument("width", type=int, help="The width of the generated image.")
parser.add_argument("height", type=int, help="The height of the generated image.")
parser.add_argument("number", type=int, help="The number of images to generate.")
parser.add_argument(
"batch",
type=int,
help="The batch size. aka the number of images to generate at once.",
)
parser.add_argument(
"--hires-fix", action="store_true", help="Enable high-resolution fix."
)
parser.add_argument(
"--adetailer",
action="store_true",
help="Enable automatic face and body enhancin.g",
)
parser.add_argument(
"--enhance-prompt",
action="store_true",
help="Enable Ollama prompt enhancement. Make sure to have ollama with Ollama installed.",
)
parser.add_argument(
"--img2img",
action="store_true",
help="Enable image-to-image mode. This will use the prompt as path to the image.",
)
parser.add_argument(
"--stable-fast",
action="store_true",
help="Enable StableFast mode. This will compile the model for faster inference.",
)
parser.add_argument(
"--reuse-seed",
action="store_true",
help="Enable to reuse last used seed for sampling, default for False is a random seed at every use.",
)
parser.add_argument(
"--flux",
action="store_true",
help="Enable the flux mode.",
)
parser.add_argument(
"--prio-speed",
action="store_true",
help="Prioritize speed over quality.",
)
parser.add_argument(
"--autohdr",
action="store_true",
help="Enable the AutoHDR mode.",
)
parser.add_argument(
"--realistic-model",
action="store_true",
help="Use the realistic model.",
)
args = parser.parse_args()
pipeline(
args.prompt,
args.width,
args.height,
args.number,
args.batch,
args.hires_fix,
args.adetailer,
args.enhance_prompt,
args.img2img,
args.stable_fast,
args.reuse_seed,
args.flux,
args.prio_speed,
args.autohdr,
args.realistic_model,
)