File size: 6,827 Bytes
7cffda1
 
 
909eb62
7cffda1
909eb62
 
 
7cffda1
 
d9bbac9
7cffda1
 
 
 
 
 
1168b4e
7cffda1
 
 
909eb62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cffda1
 
 
 
909eb62
 
372ff8e
 
2be0274
909eb62
2be0274
7cffda1
2be0274
909eb62
 
 
2be0274
909eb62
 
 
2be0274
7cffda1
909eb62
 
7cffda1
 
 
 
 
 
2be0274
 
7cffda1
 
 
 
 
 
 
 
35d77df
 
1168b4e
7cffda1
 
a4d79f2
 
7cffda1
a4d79f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cffda1
d6d22de
a4d79f2
 
 
 
 
 
 
 
909eb62
 
 
 
 
 
 
 
a4d79f2
 
 
 
 
 
1168b4e
a4d79f2
 
d6d22de
7cffda1
a4d79f2
 
 
 
 
 
7cffda1
a4d79f2
 
 
 
 
 
 
7cffda1
a4d79f2
 
 
e961ca6
7cffda1
289a338
7cffda1
 
 
 
 
 
372ff8e
 
7cffda1
372ff8e
7cffda1
1168b4e
7cffda1
 
 
909eb62
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import gradio as gr
import numpy as np
import random
import os
import torch
from diffusers import StableDiffusionPipeline
from peft import PeftModel, LoraConfig
from diffusers import DiffusionPipeline

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"  # Replace to the model you would like to use

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024


def get_lora_sd_pipeline(
    ckpt_dir='./output', 
    base_model_name_or_path=model_id_default, 
    dtype=torch_dtype, 
    device=device, 
    adapter_name="default"
):
    unet_sub_dir = os.path.join(ckpt_dir, "unet")
    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
    if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
        config = LoraConfig.from_pretrained(text_encoder_sub_dir)
        base_model_name_or_path = config.base_model_name_or_path

    if base_model_name_or_path is None:
        raise ValueError("Please specify the base model name or path")

    pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype).to(device)
    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)

    if os.path.exists(text_encoder_sub_dir):
        pipe.text_encoder = PeftModel.from_pretrained(
            pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
        )

    if dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    pipe.to(device)
    return pipe


def encode_prompt(prompt, tokenizer, text_encoder):
    text_inputs = tokenizer(
        prompt,
        padding="max_length",
        max_length=tokenizer.model_max_length,
        return_tensors="pt",
    )
    with torch.no_grad():
        if len(text_inputs.input_ids[0]) < tokenizer.model_max_length:
            prompt_embeds = text_encoder(text_inputs.input_ids.to(text_encoder.device))[0]
        else:
            embeds = []
            start = 0
            while start < tokenizer.model_max_length:
                end = start + tokenizer.model_max_length
                part_of_text_inputs = text_inputs.input_ids[0][start:end]
                if len(part_of_text_inputs) < tokenizer.model_max_length:
                    part_of_text_inputs = torch.cat([part_of_text_inputs, torch.tensor([tokenizer.pad_token_id] * (tokenizer.model_max_length - len(part_of_text_inputs)))])
                embeds.append(text_encoder(part_of_text_inputs.to(text_encoder.device).unsqueeze(0))[0])
                start += int((8/11)*tokenizer.model_max_length)
            prompt_embeds = torch.mean(torch.stack(embeds, dim=0), dim=0)
    return prompt_embeds


pipe = get_lora_sd_pipeline(adapter_name="sticker_of_funny_cat_Pusheen")


# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    prompt,
    negative_prompt,
    width=512,
    height=512,
    model_id=model_id_default,
    seed=42,
    guidance_scale=7.0,
    lora_scale=0.5,
    num_inference_steps=20,
    progress=gr.Progress(track_tqdm=True),
):  
    generator = torch.Generator(device).manual_seed(seed)
    pipe = get_lora_sd_pipeline(base_model_name_or_path=model_id,
                                adapter_name="sticker_of_funny_cat_Pusheen")    
    pipe = pipe.to(device)
    prompt_embeds = encode_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
    negative_prompt_embeds = encode_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
    pipe.fuse_lora(lora_scale=lora_scale)
    
    image = pipe(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    ).images[0]
    
    return image

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css, fill_height=True) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Text-to-Image demo")

        with gr.Row():
            model_id = gr.Textbox(
                label="Model ID",
                max_lines=1,
                placeholder="Enter model id",
                value=model_id_default,
            )

        prompt = gr.Textbox(
            label="Prompt",
            max_lines=1,
            placeholder="Enter your prompt",
        )
        
        negative_prompt = gr.Textbox(
            label="Negative prompt",
            max_lines=1,
            placeholder="Enter your negative prompt",
        )
        
        with gr.Row():
            seed = gr.Number(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )
            
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=0.0,
                maximum=10.0,
                step=0.1,
                value=7.0,  # Replace with defaults that work for your model
            )

            lora_scale = gr.Slider(
                label="LoRA scale",
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=0.5,
            )

            num_inference_steps = gr.Slider(
                label="Number of inference steps",
                minimum=1,
                maximum=50,
                step=1,
                value=20,  # Replace with defaults that work for your model
            )

        with gr.Accordion("Optional Settings", open=False):
            
            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,  # Replace with defaults that work for your model
                )
        
        run_button = gr.Button("Run", scale=0, variant="primary")
        result = gr.Image(label="Result", show_label=False)
            
    gr.on(
        triggers=[run_button.click],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            width,
            height,
            model_id,
            seed,
            guidance_scale,
            num_inference_steps,       
        ],
        outputs=[result],
    )

if __name__ == "__main__":
    demo.launch()