Spaces:
Sleeping
Sleeping
def latents_to_rgb(latents): | |
weights = ( | |
(60, -60, 25, -70), | |
(60, -5, 15, -50), | |
(60, 10, -5, -35), | |
) | |
weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device)) | |
biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device) | |
rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1) | |
image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0) | |
return Image.fromarray(image_array) | |
def decode_tensors(pipe, step, timestep, callback_kwargs): | |
latents = callback_kwargs["latents"] | |
image = latents_to_rgb(latents[0]) | |
image.save(f"{step}.png") | |
return callback_kwargs | |
from diffusers import AutoPipelineForText2Image | |
import torch | |
from PIL import Image | |
pipeline = AutoPipelineForText2Image.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
torch_dtype=torch.float16, | |
variant="fp16", | |
use_safetensors=True | |
).to("cuda") | |
image = pipeline( | |
prompt="A croissant shaped like a cute bear.", | |
negative_prompt="Deformed, ugly, bad anatomy", | |
callback_on_step_end=decode_tensors, | |
callback_on_step_end_tensor_inputs=["latents"], | |
).images[0] |