#!/usr/bin/env python3 import hf_image_uploader as hiu import torch from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline pipe = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", torch_dtype=torch.float16 ) pipe2 = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-refiner-1.0", variant="fp16", torch_dtype=torch.float16 ) pipe.to("cuda") pipe2.to("cuda") # pipe.enable_model_cpu_offload() # pipe2.enable_model_cpu_offload() compel = Compel( tokenizer=[pipe.tokenizer, pipe.tokenizer_2] , text_encoder=[pipe.text_encoder, pipe.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True] ) compel2 = Compel( tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=True, ) # apply weights prompt = ["a red cat playing with a (ball)1.5", "a red cat playing with a (ball)0.6"] conditioning, pooled = compel(prompt) conditioning2, pooled2 = compel2(prompt) # generate image for _ in range(3): generator = [torch.Generator().manual_seed(i) for i in range(len(prompt))] image = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30, output_type="latent").images image = pipe2(image=image, prompt_embeds=conditioning2, pooled_prompt_embeds=pooled2, generator=generator, num_inference_steps=20).images[0] hiu.upload(image, "patrickvonplaten/images")