Spaces:
Paused
Paused
File size: 4,324 Bytes
3d446a2 488aaf5 3d446a2 cd2da70 3d446a2 488aaf5 53ad954 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 3d446a2 488aaf5 a8763a3 a1eca34 488aaf5 a1eca34 488aaf5 a1eca34 488aaf5 a1eca34 488aaf5 d89af39 3d446a2 a1eca34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from diffusers import StableDiffusionXLPipeline, DDIMScheduler
import torch
import gradio as gr
import inversion
import numpy as np
from PIL import Image
import sa_handler
device = "cuda" if torch.cuda.is_available() else "cpu"
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
pipeline = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, scheduler=scheduler).to(device)
def run(image, src_style, src_prompt, prompts, shared_score_shift, shared_score_scale, guidance_scale, num_inference_steps, large, seed):
prompts = prompts.splitlines()
dim, d = (1024, 128) if large else (512, 64)
image = image.resize((dim, dim))
x0 = np.array(image)
zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2)
prompts.insert(0, src_prompt)
shared_score_shift = np.log(shared_score_shift)
handler = sa_handler.Handler(pipeline)
sa_args = sa_handler.StyleAlignedArgs(
share_group_norm=True, share_layer_norm=True, share_attention=True,
adain_queries=True, adain_keys=True, adain_values=False,
shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,)
handler.register(sa_args)
for i in range(1, len(prompts)):
prompts[i] = f'{prompts[i]}, {src_style}.'
zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5)
g_cpu = torch.Generator(device='cpu')
if seed > 0:
g_cpu.manual_seed(seed)
latents = torch.randn(len(prompts), 4, d, d, device='cpu', generator=g_cpu, dtype=pipeline.unet.dtype,).to(device)
latents[0] = zT
images_a = pipeline(prompts, latents=latents, callback_on_step_end=inversion_callback, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images
handler.remove()
torch.cuda.empty_cache()
images_pil = [Image.fromarray((img * 255).astype(np.uint8)) for img in images_a]
return images_pil
with gr.Blocks() as demo:
gr.Markdown('''# Welcome to Tonic's Stable Style Align
Here you can generate images with a style from a reference image using [transfer style from sdxl](https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl). Add a reference picture, describe the style and add prompts to generate images in that style. It's the most interesting with your own art!''')
image_input = gr.Image(label="Reference image", type="pil")
style_input = gr.Textbox(label="Describe the reference style")
image_desc_input = gr.Textbox(label="Describe the reference image")
prompts_input = gr.Textbox(label="Prompts to generate images (separate with new lines)", lines=5)
shared_score_shift_input = gr.Slider(value=1.1, label="shared_score_shift", minimum=1.0, maximum=2.0, step=0.05)
shared_score_scale_input = gr.Slider(value=1.0, label="shared_score_scale", minimum=0.0, maximum=1.0, step=0.05)
guidance_scale_input = gr.Slider(value=10.0, label="guidance_scale", minimum=5.0, maximum=20.0, step=1)
num_inference_steps_input = gr.Slider(value=12, label="num_inference_steps", minimum=1, maximum=12, step=1)
large_input = gr.Checkbox(False, label="Large (1024x1024)")
seed_input = gr.Slider(value=0, label="seed (0 for random)", minimum=0, maximum=1000000, step=42)
with gr.Row():
image_input.display()
with gr.Row():
style_input.display()
image_desc_input.display()
prompts_input.display()
with gr.Accordion(label="Advanced Settings"):
with gr.Row():
shared_score_shift_input.display()
shared_score_scale_input.display()
guidance_scale_input.display()
num_inference_steps_input.display()
large_input.display()
seed_input.display()
with gr.Row():
run_button = gr.Button("Generate Images")
with gr.Row():
output_gallery = gr.Gallery()
run_button.click(
run,
inputs=[image_input, style_input, image_desc_input, prompts_input, shared_score_shift_input, shared_score_scale_input, guidance_scale_input, num_inference_steps_input, large_input, seed_input],
outputs=output_gallery
)
demo.launch() |