Spaces:
Running
on
Zero
Running
on
Zero
import argparse | |
import os | |
import random | |
import sys | |
import numpy as np | |
import torch | |
from PIL import Image | |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) | |
from modules.AutoDetailer import SAM, SEGS, ADetailer, bbox | |
from modules.AutoEncoders import VariationalAE | |
from modules.clip import Clip | |
from modules.FileManaging import Downloader, ImageSaver, Loader | |
from modules.hidiffusion import msw_msa_attention | |
from modules.Model import LoRas | |
from modules.Quantize import Quantizer | |
from modules.sample import sampling | |
from modules.UltimateSDUpscale import UltimateSDUpscale, USDU_upscaler | |
from modules.Utilities import Enhancer, Latent, upscale | |
from modules.WaveSpeed import fbcache_nodes | |
from modules.AutoHDR import ahdr | |
with open(os.path.join("./_internal/", "last_seed.txt"), "r") as f: | |
last_seed = int(f.read()) | |
Downloader.CheckAndDownload() | |
def pipeline( | |
prompt: str, | |
w: int, | |
h: int, | |
number: int = 1, | |
batch: int = 1, | |
hires_fix: bool = False, | |
adetailer: bool = False, | |
enhance_prompt: bool = False, | |
img2img: bool = False, | |
stable_fast: bool = False, | |
reuse_seed: bool = False, | |
flux_enabled: bool = False, | |
prio_speed: bool = False, | |
autohdr: bool = False, | |
realistic_model: bool = False, | |
) -> None: | |
"""#### Run the LightDiffusion pipeline. | |
#### Args: | |
- `prompt` (str): The prompt for the pipeline. | |
- `w` (int): The width of the generated image. | |
- `h` (int): The height of the generated image. | |
- `hires_fix` (bool, optional): Enable high-resolution fix. Defaults to False. | |
- `adetailer` (bool, optional): Enable automatic face and body enhancing. Defaults to False. | |
- `enhance_prompt` (bool, optional): Enable Ollama prompt enhancement. Defaults to False. | |
- `img2img` (bool, optional): Use LightDiffusion in Image to Image mode, the prompt input becomes the path to the input image. Defaults to False. | |
- `stable_fast` (bool, optional): Enable Stable-Fast speedup offering a 70% speed improvement in return of a compilation time. Defaults to False. | |
- `reuse_seed` (bool, optional): Reuse the last used seed, if False the seed will be kept random. Default to False. | |
- `flux_enabled` (bool, optional): Enable the flux mode. Defaults to False. | |
- `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False. | |
- `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False. | |
- `realistic_model` (bool, optional): Use the realistic model. Defaults to False. | |
""" | |
global last_seed | |
if reuse_seed: | |
seed = last_seed | |
else: | |
seed = random.randint(1, 2**64) | |
last_seed = seed | |
with open(os.path.join("./_internal/", "last_seed.txt"), "w") as f: | |
f.write(str(seed)) | |
if enhance_prompt: | |
try: | |
prompt = Enhancer.enhance_prompt(prompt) | |
except: | |
pass | |
sampler_name = "dpmpp_sde_cfgpp" if not prio_speed else "dpmpp_2m_cfgpp" | |
ckpt = ( | |
"./_internal/checkpoints/Meina V10 - baked VAE.safetensors" | |
if not realistic_model | |
else "./_internal/checkpoints/DreamShaper_8_pruned.safetensors" | |
) | |
with torch.inference_mode(): | |
if not flux_enabled: | |
checkpointloadersimple = Loader.CheckpointLoaderSimple() | |
checkpointloadersimple_241 = checkpointloadersimple.load_checkpoint( | |
ckpt_name=ckpt | |
) | |
hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple() | |
cliptextencode = Clip.CLIPTextEncode() | |
emptylatentimage = Latent.EmptyLatentImage() | |
ksampler_instance = sampling.KSampler() | |
vaedecode = VariationalAE.VAEDecode() | |
saveimage = ImageSaver.SaveImage() | |
latent_upscale = upscale.LatentUpscale() | |
hdr = ahdr.HDREffects() | |
for _ in range(number): | |
if img2img: | |
img = Image.open(prompt) | |
img_array = np.array(img) | |
img_tensor = torch.from_numpy(img_array).float().to("cpu") / 255.0 | |
img_tensor = img_tensor.unsqueeze(0) | |
with torch.inference_mode(): | |
ultimatesdupscale = UltimateSDUpscale.UltimateSDUpscale() | |
try: | |
loraloader = LoRas.LoraLoader() | |
loraloader_274 = loraloader.load_lora( | |
lora_name="add_detail.safetensors", | |
strength_model=2, | |
strength_clip=2, | |
model=checkpointloadersimple_241[0], | |
clip=checkpointloadersimple_241[1], | |
) | |
except: | |
loraloader_274 = checkpointloadersimple_241 | |
if stable_fast is True: | |
from modules.StableFast import StableFast | |
applystablefast = StableFast.ApplyStableFastUnet() | |
applystablefast_158 = applystablefast.apply_stable_fast( | |
enable_cuda_graph=False, | |
model=loraloader_274[0], | |
) | |
else: | |
applystablefast_158 = loraloader_274 | |
clipsetlastlayer = Clip.CLIPSetLastLayer() | |
clipsetlastlayer_257 = clipsetlastlayer.set_last_layer( | |
stop_at_clip_layer=-2, clip=loraloader_274[1] | |
) | |
cliptextencode_242 = cliptextencode.encode( | |
text=prompt, | |
clip=clipsetlastlayer_257[0], | |
) | |
cliptextencode_243 = cliptextencode.encode( | |
text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)", | |
clip=clipsetlastlayer_257[0], | |
) | |
upscalemodelloader = USDU_upscaler.UpscaleModelLoader() | |
upscalemodelloader_244 = upscalemodelloader.load_model( | |
"RealESRGAN_x4plus.pth" | |
) | |
ultimatesdupscale_250 = ultimatesdupscale.upscale( | |
upscale_by=2, | |
seed=random.randint(1, 2**64), | |
steps=8, | |
cfg=6, | |
sampler_name=sampler_name, | |
scheduler="karras", | |
denoise=0.3, | |
mode_type="Linear", | |
tile_width=512, | |
tile_height=512, | |
mask_blur=16, | |
tile_padding=32, | |
seam_fix_mode="Half Tile", | |
seam_fix_denoise=0.2, | |
seam_fix_width=64, | |
seam_fix_mask_blur=16, | |
seam_fix_padding=32, | |
force_uniform_tiles="enable", | |
image=img_tensor, | |
model=applystablefast_158[0], | |
positive=cliptextencode_242[0], | |
negative=cliptextencode_243[0], | |
vae=checkpointloadersimple_241[2], | |
upscale_model=upscalemodelloader_244[0], | |
pipeline=True, | |
) | |
saveimage.save_images( | |
filename_prefix="LD-I2I", | |
images=hdr.apply_hdr2(ultimatesdupscale_250[0]) | |
if autohdr | |
else ultimatesdupscale_250[0], | |
) | |
elif flux_enabled: | |
Downloader.CheckAndDownloadFlux() | |
with torch.inference_mode(): | |
dualcliploadergguf = Quantizer.DualCLIPLoaderGGUF() | |
emptylatentimage = Latent.EmptyLatentImage() | |
vaeloader = VariationalAE.VAELoader() | |
unetloadergguf = Quantizer.UnetLoaderGGUF() | |
cliptextencodeflux = Quantizer.CLIPTextEncodeFlux() | |
conditioningzeroout = Quantizer.ConditioningZeroOut() | |
ksampler = sampling.KSampler() | |
unetloadergguf_10 = unetloadergguf.load_unet( | |
unet_name="flux1-dev-Q8_0.gguf" | |
) | |
fb_cache = fbcache_nodes.ApplyFBCacheOnModel() | |
unetloadergguf_10 = fb_cache.patch( | |
unetloadergguf_10, "diffusion_model", 0.120 | |
) | |
vaeloader_11 = vaeloader.load_vae(vae_name="ae.safetensors") | |
dualcliploadergguf_19 = dualcliploadergguf.load_clip( | |
clip_name1="clip_l.safetensors", | |
clip_name2="t5-v1_1-xxl-encoder-Q8_0.gguf", | |
type="flux", | |
) | |
emptylatentimage_5 = emptylatentimage.generate( | |
width=w, height=h, batch_size=batch | |
) | |
cliptextencodeflux_15 = cliptextencodeflux.encode( | |
clip_l=prompt, | |
t5xxl=prompt, | |
guidance=3.0, | |
clip=dualcliploadergguf_19[0], | |
flux_enabled=True, | |
) | |
conditioningzeroout_16 = conditioningzeroout.zero_out( | |
conditioning=cliptextencodeflux_15[0] | |
) | |
ksampler_3 = ksampler.sample( | |
seed=random.randint(1, 2**64), | |
steps=20, | |
cfg=1, | |
sampler_name="euler_cfgpp", | |
scheduler="beta", | |
denoise=1, | |
model=unetloadergguf_10[0], | |
positive=cliptextencodeflux_15[0], | |
negative=conditioningzeroout_16[0], | |
latent_image=emptylatentimage_5[0], | |
pipeline=True, | |
flux=True, | |
) | |
vaedecode_8 = vaedecode.decode( | |
samples=ksampler_3[0], | |
vae=vaeloader_11[0], | |
flux=True, | |
) | |
saveimage.save_images( | |
filename_prefix="LD-Flux", | |
images=hdr.apply_hdr2(vaedecode_8[0]) | |
if autohdr | |
else vaedecode_8[0], | |
) | |
else: | |
while prompt is None: | |
pass | |
with torch.inference_mode(): | |
try: | |
loraloader = LoRas.LoraLoader() | |
loraloader_274 = loraloader.load_lora( | |
lora_name="add_detail.safetensors", | |
strength_model=0.7, | |
strength_clip=0.7, | |
model=checkpointloadersimple_241[0], | |
clip=checkpointloadersimple_241[1], | |
) | |
print("loading add_detail.safetensors") | |
except: | |
loraloader_274 = checkpointloadersimple_241 | |
clipsetlastlayer = Clip.CLIPSetLastLayer() | |
clipsetlastlayer_257 = clipsetlastlayer.set_last_layer( | |
stop_at_clip_layer=-2, clip=loraloader_274[1] | |
) | |
applystablefast_158 = loraloader_274 | |
cliptextencode_242 = cliptextencode.encode( | |
text=prompt, | |
clip=clipsetlastlayer_257[0], | |
) | |
cliptextencode_243 = cliptextencode.encode( | |
text="(worst quality, low quality:1.4), (zombie, sketch, interlocked fingers, comic), (embedding:EasyNegative), (embedding:badhandv4), (embedding:lr), (embedding:ng_deepnegative_v1_75t)", | |
clip=clipsetlastlayer_257[0], | |
) | |
emptylatentimage_244 = emptylatentimage.generate( | |
width=w, height=h, batch_size=batch | |
) | |
if stable_fast is True: | |
from modules.StableFast import StableFast | |
applystablefast = StableFast.ApplyStableFastUnet() | |
applystablefast_158 = applystablefast.apply_stable_fast( | |
enable_cuda_graph=False, | |
model=loraloader_274[0], | |
) | |
else: | |
applystablefast_158 = loraloader_274 | |
# fb_cache = fbcache_nodes.ApplyFBCacheOnModel() | |
# applystablefast_158 = fb_cache.patch( | |
# applystablefast_158, "diffusion_model", 0.120 | |
# ) | |
ksampler_239 = ksampler_instance.sample( | |
seed=seed, | |
steps=20, | |
cfg=7, | |
sampler_name=sampler_name, | |
scheduler="karras", | |
denoise=1, | |
pipeline=True, | |
model=hidiffoptimizer.go( | |
model_type="auto", model=applystablefast_158[0] | |
)[0], | |
positive=cliptextencode_242[0], | |
negative=cliptextencode_243[0], | |
latent_image=emptylatentimage_244[0], | |
) | |
if hires_fix: | |
latentupscale_254 = latent_upscale.upscale( | |
width=w * 2, | |
height=h * 2, | |
samples=ksampler_239[0], | |
) | |
ksampler_253 = ksampler_instance.sample( | |
seed=random.randint(1, 2**64), | |
steps=10, | |
cfg=8, | |
sampler_name="euler_ancestral_cfgpp", | |
scheduler="normal", | |
denoise=0.45, | |
model=hidiffoptimizer.go( | |
model_type="auto", model=applystablefast_158[0] | |
)[0], | |
positive=cliptextencode_242[0], | |
negative=cliptextencode_243[0], | |
latent_image=latentupscale_254[0], | |
pipeline=True, | |
) | |
else: | |
ksampler_253 = ksampler_239 | |
vaedecode_240 = vaedecode.decode( | |
samples=ksampler_253[0], | |
vae=checkpointloadersimple_241[2], | |
) | |
if adetailer: | |
with torch.inference_mode(): | |
samloader = SAM.SAMLoader() | |
samloader_87 = samloader.load_model( | |
model_name="sam_vit_b_01ec64.pth", device_mode="AUTO" | |
) | |
cliptextencode_124 = cliptextencode.encode( | |
text="royal, detailed, magnificient, beautiful, seducing", | |
clip=loraloader_274[1], | |
) | |
ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider() | |
ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit( | |
# model_name="face_yolov8m.pt" | |
model_name="person_yolov8m-seg.pt" | |
) | |
bboxdetectorsegs = bbox.BboxDetectorForEach() | |
samdetectorcombined = SAM.SAMDetectorCombined() | |
impactsegsandmask = SEGS.SegsBitwiseAndMask() | |
detailerforeachdebug = ADetailer.DetailerForEachTest() | |
bboxdetectorsegs_132 = bboxdetectorsegs.doit( | |
threshold=0.5, | |
dilation=10, | |
crop_factor=2, | |
drop_size=10, | |
labels="all", | |
bbox_detector=ultralyticsdetectorprovider_151[0], | |
image=vaedecode_240[0], | |
) | |
samdetectorcombined_139 = samdetectorcombined.doit( | |
detection_hint="center-1", | |
dilation=0, | |
threshold=0.93, | |
bbox_expansion=0, | |
mask_hint_threshold=0.7, | |
mask_hint_use_negative="False", | |
sam_model=samloader_87[0], | |
segs=bboxdetectorsegs_132, | |
image=vaedecode_240[0], | |
) | |
if samdetectorcombined_139 is None: | |
return | |
impactsegsandmask_152 = impactsegsandmask.doit( | |
segs=bboxdetectorsegs_132, | |
mask=samdetectorcombined_139[0], | |
) | |
detailerforeachdebug_145 = detailerforeachdebug.doit( | |
guide_size=512, | |
guide_size_for=False, | |
max_size=768, | |
seed=random.randint(1, 2**64), | |
steps=20, | |
cfg=6.5, | |
sampler_name=sampler_name, | |
scheduler="karras", | |
denoise=0.5, | |
feather=5, | |
noise_mask=True, | |
force_inpaint=True, | |
wildcard="", | |
cycle=1, | |
inpaint_model=False, | |
noise_mask_feather=20, | |
image=vaedecode_240[0], | |
segs=impactsegsandmask_152[0], | |
model=applystablefast_158[0], | |
clip=checkpointloadersimple_241[1], | |
vae=checkpointloadersimple_241[2], | |
positive=cliptextencode_124[0], | |
negative=cliptextencode_243[0], | |
pipeline=True, | |
) | |
saveimage.save_images( | |
filename_prefix="LD-body", | |
images=hdr.apply_hdr2(detailerforeachdebug_145[0]) | |
if autohdr | |
else detailerforeachdebug_145[0], | |
) | |
ultralyticsdetectorprovider = bbox.UltralyticsDetectorProvider() | |
ultralyticsdetectorprovider_151 = ultralyticsdetectorprovider.doit( | |
model_name="face_yolov9c.pt" | |
) | |
bboxdetectorsegs_132 = bboxdetectorsegs.doit( | |
threshold=0.5, | |
dilation=10, | |
crop_factor=2, | |
drop_size=10, | |
labels="all", | |
bbox_detector=ultralyticsdetectorprovider_151[0], | |
image=detailerforeachdebug_145[0], | |
) | |
samdetectorcombined_139 = samdetectorcombined.doit( | |
detection_hint="center-1", | |
dilation=0, | |
threshold=0.93, | |
bbox_expansion=0, | |
mask_hint_threshold=0.7, | |
mask_hint_use_negative="False", | |
sam_model=samloader_87[0], | |
segs=bboxdetectorsegs_132, | |
image=detailerforeachdebug_145[0], | |
) | |
impactsegsandmask_152 = impactsegsandmask.doit( | |
segs=bboxdetectorsegs_132, | |
mask=samdetectorcombined_139[0], | |
) | |
detailerforeachdebug_145 = detailerforeachdebug.doit( | |
guide_size=512, | |
guide_size_for=False, | |
max_size=768, | |
seed=random.randint(1, 2**64), | |
steps=20, | |
cfg=6.5, | |
sampler_name=sampler_name, | |
scheduler="karras", | |
denoise=0.5, | |
feather=5, | |
noise_mask=True, | |
force_inpaint=True, | |
wildcard="", | |
cycle=1, | |
inpaint_model=False, | |
noise_mask_feather=20, | |
image=detailerforeachdebug_145[0], | |
segs=impactsegsandmask_152[0], | |
model=applystablefast_158[0], | |
clip=checkpointloadersimple_241[1], | |
vae=checkpointloadersimple_241[2], | |
positive=cliptextencode_124[0], | |
negative=cliptextencode_243[0], | |
pipeline=True, | |
) | |
saveimage.save_images( | |
filename_prefix="LD-head", | |
images=hdr.apply_hdr2(detailerforeachdebug_145[0]) | |
if autohdr | |
else detailerforeachdebug_145[0], | |
) | |
else: | |
saveimage.save_images( | |
filename_prefix="LD-HF" if hires_fix else "LD", | |
images=hdr.apply_hdr2(vaedecode_240[0]) | |
if autohdr | |
else vaedecode_240[0], | |
) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Run the LightDiffusion pipeline.") | |
parser.add_argument("prompt", type=str, help="The prompt for the pipeline.") | |
parser.add_argument("width", type=int, help="The width of the generated image.") | |
parser.add_argument("height", type=int, help="The height of the generated image.") | |
parser.add_argument("number", type=int, help="The number of images to generate.") | |
parser.add_argument( | |
"batch", | |
type=int, | |
help="The batch size. aka the number of images to generate at once.", | |
) | |
parser.add_argument( | |
"--hires-fix", action="store_true", help="Enable high-resolution fix." | |
) | |
parser.add_argument( | |
"--adetailer", | |
action="store_true", | |
help="Enable automatic face and body enhancin.g", | |
) | |
parser.add_argument( | |
"--enhance-prompt", | |
action="store_true", | |
help="Enable Ollama prompt enhancement. Make sure to have ollama with Ollama installed.", | |
) | |
parser.add_argument( | |
"--img2img", | |
action="store_true", | |
help="Enable image-to-image mode. This will use the prompt as path to the image.", | |
) | |
parser.add_argument( | |
"--stable-fast", | |
action="store_true", | |
help="Enable StableFast mode. This will compile the model for faster inference.", | |
) | |
parser.add_argument( | |
"--reuse-seed", | |
action="store_true", | |
help="Enable to reuse last used seed for sampling, default for False is a random seed at every use.", | |
) | |
parser.add_argument( | |
"--flux", | |
action="store_true", | |
help="Enable the flux mode.", | |
) | |
parser.add_argument( | |
"--prio-speed", | |
action="store_true", | |
help="Prioritize speed over quality.", | |
) | |
parser.add_argument( | |
"--autohdr", | |
action="store_true", | |
help="Enable the AutoHDR mode.", | |
) | |
parser.add_argument( | |
"--realistic-model", | |
action="store_true", | |
help="Use the realistic model.", | |
) | |
args = parser.parse_args() | |
pipeline( | |
args.prompt, | |
args.width, | |
args.height, | |
args.number, | |
args.batch, | |
args.hires_fix, | |
args.adetailer, | |
args.enhance_prompt, | |
args.img2img, | |
args.stable_fast, | |
args.reuse_seed, | |
args.flux, | |
args.prio_speed, | |
args.autohdr, | |
args.realistic_model, | |
) | |