#!/usr/bin/env python3
from diffusers import DiffusionPipeline, AutoencoderKL
import torch
torch.backends.cudnn.deterministic = False
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
torch.backends.cudnn.benchmark = True
torch.backends.cuda.enable_flash_sdp(False)

vae = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", torch_dtype=torch.float16)
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
#pipe.enable_model_cpu_offload()

# if using torch < 2.0
# pipe.enable_xformers_memory_efficient_attention()

# Reproducibility.
torch_seed = 4202420420
refiner_seed = 698008569
prompt = "happy child flying a kite on a sunny day"
negative_prompt = ''
# Batch size.
batch_size = 2
do_latent = True
prompt = [ prompt ] * batch_size
negative_prompt = [ negative_prompt ] * batch_size
# We're going to schedule 20 steps, and complete 50% of them using either model.
total_num_steps = 20
# We need multiple Generators.
generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# Using channels last layout.
# pipe.unet.to(memory_format=torch.channels_last)
pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, 

# Generate the base image.
pre_image = base_pipe(prompt=prompt, generator=generator,
        num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images

# Generate a range from 0.1 to 0.9, with 0.1 increments.
test_strengths = [0.2]
for refiner_strength in test_strengths:
    # Generate a new set of random states for each image.
    generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size
    # Put through the refiner now.
    images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
                num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
    for idx in range(0, len(images)):
        print(f'Image: {idx}')
        images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')