#!/usr/bin/env python3 from diffusers import DiffusionPipeline, AutoencoderKL import torch torch.backends.cudnn.deterministic = False torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False torch.backends.cudnn.benchmark = True torch.backends.cuda.enable_flash_sdp(False) vae = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", torch_dtype=torch.float16) base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16") base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, #pipe.enable_model_cpu_offload() # if using torch < 2.0 # pipe.enable_xformers_memory_efficient_attention() # Reproducibility. torch_seed = 4202420420 refiner_seed = 698008569 prompt = "happy child flying a kite on a sunny day" negative_prompt = '' # Batch size. batch_size = 2 do_latent = True prompt = [ prompt ] * batch_size negative_prompt = [ negative_prompt ] * batch_size # We're going to schedule 20 steps, and complete 50% of them using either model. total_num_steps = 20 # We need multiple Generators. generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16") # Using channels last layout. # pipe.unet.to(memory_format=torch.channels_last) pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, # Generate the base image. pre_image = base_pipe(prompt=prompt, generator=generator, num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images # Generate a range from 0.1 to 0.9, with 0.1 increments. test_strengths = [0.2] for refiner_strength in test_strengths: # Generate a new set of random states for each image. generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size # Put through the refiner now. images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two, num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start for idx in range(0, len(images)): print(f'Image: {idx}') images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')