Spaces:

ktrndy
/

diffusion-image-gen

Sleeping

File size: 4,960 Bytes

7cffda1
 
 
909eb62
7cffda1
909eb62
 
7cffda1
 
ce1e24f
7cffda1
033c787
 
 
 
7cffda1
1168b4e
7cffda1
 
 
 
 
 
 
909eb62
 
372ff8e
 
2be0274
6b00928
8b2fd67
7cffda1
2be0274
909eb62
17091ee
f308f4a
17091ee
 
 
 
 
 
 
5ac29d1
17091ee
 
 
 
645b875
 
329b2a2
30f9970
17091ee
 
 
 
2be0274
7cffda1
2ca97fb
 
7cffda1
 
 
 
 
 
2be0274
 
7cffda1
 
 
 
 
 
 
 
35d77df
 
1168b4e
7cffda1
 
a4d79f2
 
7cffda1
a4d79f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cffda1
d6d22de
a4d79f2
 
 
 
 
 
 
55b7ae0
39e252c
 
 
 
 
 
909eb62
 
a4d79f2
 
 
 
 
 
1168b4e
a4d79f2
 
d6d22de
7cffda1
a4d79f2
 
 
 
 
8b2fd67
7cffda1
a4d79f2
 
 
 
 
 
8b2fd67
7cffda1
a4d79f2
 
 
e961ca6
7cffda1
289a338
7cffda1
 
 
 
 
 
372ff8e
 
209acb7
 
 
099d3e3
7cffda1
1168b4e
7cffda1
 
 
909eb62

import gradio as gr
import numpy as np
import random
import os
import torch
from diffusers import StableDiffusionPipeline
from peft import PeftModel, LoraConfig

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024


# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    prompt,
    negative_prompt,
    width=512,
    height=512,
    model_id=model_id_default,
    seed=42,
    guidance_scale=7.0,
    lora_scale=1.0,
    num_inference_steps=20,
    progress=gr.Progress(track_tqdm=True),
):  
    generator = torch.Generator(device).manual_seed(seed)

    ckpt_dir='./model_output'
    unet_sub_dir = os.path.join(ckpt_dir, "unet")
    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")

    if model_id is None:
        raise ValueError("Please specify the base model name or path")

    pipe = StableDiffusionPipeline.from_pretrained(model_id, 
                                                   torch_dtype=torch_dtype, 
                                                   safety_checker=None).to(device)
    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir)
    pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir)

    pipe.unet.load_state_dict({k: lora_scale*v for k, v in pipe.unet.state_dict().items()})
    pipe.text_encoder.load_state_dict({k: lora_scale*v for k, v in pipe.text_encoder.state_dict().items()})
    
    if torch_dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    pipe.to(device)
    
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    ).images[0]
    
    return image

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css, fill_height=True) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Text-to-Image demo")

        with gr.Row():
            model_id = gr.Textbox(
                label="Model ID",
                max_lines=1,
                placeholder="Enter model id",
                value=model_id_default,
            )

        prompt = gr.Textbox(
            label="Prompt",
            max_lines=1,
            placeholder="Enter your prompt",
        )
        
        negative_prompt = gr.Textbox(
            label="Negative prompt",
            max_lines=1,
            placeholder="Enter your negative prompt",
        )
        
        with gr.Row():
            seed = gr.Number(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )
            
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=0.0,
                maximum=10.0,
                step=0.1,
                value=7.0,  # Replace with defaults that work for your model
            )
        with gr.Row():
            lora_scale = gr.Slider(
                label="LoRA scale",
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=1.0,
            )

            num_inference_steps = gr.Slider(
                label="Number of inference steps",
                minimum=1,
                maximum=50,
                step=1,
                value=20,  # Replace with defaults that work for your model
            )

        with gr.Accordion("Optional Settings", open=False):
            
            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )
        
        run_button = gr.Button("Run", scale=0, variant="primary")
        result = gr.Image(label="Result", show_label=False)
            
    gr.on(
        triggers=[run_button.click],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            width,
            height,
            model_id,
            seed,
            guidance_scale,      
            lora_scale,
            num_inference_steps
            
        ],
        outputs=[result],
    )

if __name__ == "__main__":
    demo.launch()