dbaranchuk commited on
Commit
88adfd9
·
1 Parent(s): fac3c6f

Update space

Browse files
Files changed (1) hide show
  1. app.py +45 -192
app.py CHANGED
@@ -5,131 +5,32 @@ import random
5
  import torch
6
  from diffusers import DDPMScheduler, StableDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
7
  import p2p, generation, inversion
 
8
 
9
- model_id = 'runwayml/stable-diffusion-v1-5'
10
- dtype=torch.float16
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
12
 
13
- # Reverse
14
- # -----------------------------
15
- pipe_reverse = StableDiffusionPipeline.from_pretrained(model_id,
16
- scheduler=DDIMScheduler.from_pretrained(model_id,
17
- subfolder="scheduler"),
18
- ).to(device=device, dtype=dtype)
19
- unet = UNet2DConditionModel.from_pretrained("dbaranchuk/sd15-cfg-distill-unet").to(device)
20
- pipe_reverse.unet = unet
21
- pipe_reverse.load_lora_weights("dbaranchuk/icd-lora-sd15",
22
- weight_name='reverse-259-519-779-999.safetensors')
23
- pipe_reverse.fuse_lora()
24
- pipe_reverse.to(device)
25
- # -----------------------------
26
-
27
- # Forward
28
- # -----------------------------
29
- pipe_forward = StableDiffusionPipeline.from_pretrained(model_id,
30
- scheduler=DDIMScheduler.from_pretrained(model_id,
31
- subfolder="scheduler"),
32
- ).to(device=device, dtype=dtype)
33
- unet = UNet2DConditionModel.from_pretrained("dbaranchuk/sd15-cfg-distill-unet").to(device)
34
- pipe_forward.unet = unet
35
- pipe_forward.load_lora_weights("dbaranchuk/icd-lora-sd15",
36
- weight_name='forward-19-259-519-779.safetensors')
37
- pipe_forward.fuse_lora()
38
- pipe_forward.to(device)
39
- # -----------------------------
40
 
41
  MAX_SEED = np.iinfo(np.int32).max
42
  MAX_IMAGE_SIZE = 1024
43
 
44
  @spaces.GPU(duration=30)
45
- def infer(image_path, input_prompt, edited_prompt, guidance, tau,
46
- crs, srs, amplify_factor, amplify_word,
47
- blend_orig, blend_edited, is_replacement):
48
-
49
- tokenizer = pipe_forward.tokenizer
50
- noise_scheduler = DDPMScheduler.from_pretrained(
51
- "runwayml/stable-diffusion-v1-5", subfolder="scheduler", )
52
-
53
- NUM_REVERSE_CONS_STEPS = 4
54
- REVERSE_TIMESTEPS = [259, 519, 779, 999]
55
- NUM_FORWARD_CONS_STEPS = 4
56
- FORWARD_TIMESTEPS = [19, 259, 519, 779]
57
- NUM_DDIM_STEPS = 50
58
-
59
- solver = generation.Generator(
60
- model=pipe_forward,
61
- noise_scheduler=noise_scheduler,
62
- n_steps=NUM_DDIM_STEPS,
63
- forward_cons_model=pipe_forward,
64
- forward_timesteps=FORWARD_TIMESTEPS,
65
- reverse_cons_model=pipe_reverse,
66
- reverse_timesteps=REVERSE_TIMESTEPS,
67
- num_endpoints=NUM_REVERSE_CONS_STEPS,
68
- num_forward_endpoints=NUM_FORWARD_CONS_STEPS,
69
- max_forward_timestep_index=49,
70
- start_timestep=19)
71
-
72
- p2p.NUM_DDIM_STEPS = NUM_DDIM_STEPS
73
- p2p.tokenizer = tokenizer
74
- p2p.device = 'cuda'
75
-
76
- prompt = [input_prompt]
77
-
78
- (image_gt, image_rec), ddim_latent, uncond_embeddings = inversion.invert(
79
- # Playing params
80
- image_path=image_path,
81
- prompt=prompt,
82
-
83
- # Fixed params
84
- is_cons_inversion=True,
85
- w_embed_dim=512,
86
- inv_guidance_scale=0.0,
87
- stop_step=50,
88
- solver=solver,
89
- seed=10500)
90
-
91
- p2p.NUM_DDIM_STEPS = 4
92
- p2p.tokenizer = tokenizer
93
- p2p.device = 'cuda'
94
 
95
- prompts = [input_prompt,
96
- edited_prompt
97
- ]
 
 
 
98
 
99
- # Playing params
100
- cross_replace_steps = {'default_': crs, }
101
- self_replace_steps = srs
102
- blend_word = (((blend_orig,), (blend_edited,)))
103
- eq_params = {"words": (amplify_word,), "values": (amplify_factor,)}
104
-
105
- controller = p2p.make_controller(prompts,
106
- is_replacement, # (is_replacement) True if only one word is changed
107
- cross_replace_steps,
108
- self_replace_steps,
109
- blend_word,
110
- eq_params)
111
-
112
- tau = tau
113
- image, _ = generation.runner(
114
- # Playing params
115
- guidance_scale=guidance-1,
116
- tau1=tau, # Dynamic guidance if tau < 1.0
117
- tau2=tau,
118
-
119
- # Fixed params
120
- model=pipe_reverse,
121
- is_cons_forward=True,
122
- w_embed_dim=512,
123
- solver=solver,
124
- prompt=prompts,
125
- controller=controller,
126
- num_inference_steps=50,
127
- generator=None,
128
- latent=ddim_latent,
129
- uncond_embeddings=uncond_embeddings,
130
- return_type='image')
131
-
132
- image = generation.to_pil_images(image[1, :, :, :])
133
  return image
134
 
135
  css="""
@@ -176,14 +77,8 @@ with gr.Blocks(css=css) as demo:
176
  )
177
  with gr.Row():
178
 
179
- input_prompt = gr.Text(
180
- label="Origial prompt",
181
- max_lines=1,
182
- placeholder="Enter your prompt",
183
- )
184
-
185
- prompt = gr.Text(
186
- label="Edited prompt",
187
  max_lines=1,
188
  placeholder="Enter your prompt",
189
  )
@@ -192,7 +87,7 @@ with gr.Blocks(css=css) as demo:
192
  with gr.Row():
193
 
194
  with gr.Column():
195
- input_image = gr.Image(label="Input image", height=512, width=512, show_label=False)
196
  with gr.Column():
197
  result = gr.Image(label="Result", height=512, width=512, show_label=False)
198
 
@@ -201,68 +96,28 @@ with gr.Blocks(css=css) as demo:
201
  with gr.Row():
202
 
203
  guidance_scale = gr.Slider(
204
- label="Guidance scale",
205
  minimum=1.0,
206
- maximum=20.0,
207
  step=1.0,
208
- value=20.0,
209
  )
210
 
211
- tau = gr.Slider(
212
- label="Dynamic guidance tau",
213
- minimum=0.0,
214
- maximum=1.0,
215
- step=0.2,
216
- value=0.8,
217
- )
218
-
219
- with gr.Row():
220
-
221
- crs = gr.Slider(
222
- label="Cross replace steps",
223
- minimum=0.0,
224
- maximum=1.0,
225
- step=0.1,
226
- value=0.4
227
- )
228
-
229
- srs = gr.Slider(
230
- label="Self replace steps",
231
- minimum=0.0,
232
- maximum=1.0,
233
- step=0.1,
234
- value=0.4,
235
  )
236
 
237
- with gr.Row():
238
- amplify_word = gr.Text(
239
- label="Amplify word",
240
- max_lines=1,
241
- placeholder="Enter your word",
242
- )
243
-
244
- amplify_factor = gr.Slider(
245
- label="Amplify factor",
246
- minimum=0.0,
247
- maximum=30,
248
  step=1.0,
249
- value=1,
250
  )
251
- with gr.Row():
252
-
253
- blend_orig = gr.Text(
254
- label="Blended word 1",
255
- max_lines=1,
256
- placeholder="Enter your word",)
257
-
258
- blend_edited = gr.Text(
259
- label="Blended word 2",
260
- max_lines=1,
261
- placeholder="Enter your word",)
262
-
263
- with gr.Row():
264
-
265
- is_replacement = gr.Checkbox(label="Is replacement?", value=False)
266
 
267
  with gr.Row():
268
  run_button = gr.Button("Edit", scale=0)
@@ -397,22 +252,20 @@ with gr.Blocks(css=css) as demo:
397
  ],
398
  ]
399
 
400
- gr.Examples(
401
- examples = examples,
402
- inputs =[input_image, input_prompt, prompt,
403
- guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
404
- blend_orig, blend_edited, is_replacement],
405
- outputs=[
406
- result
407
- ],
408
- fn=infer, cache_examples=True
409
- )
410
 
411
  run_button.click(
412
  fn = infer,
413
- inputs=[input_image, input_prompt, prompt,
414
- guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
415
- blend_orig, blend_edited, is_replacement],
416
  outputs = [result]
417
  )
418
 
 
5
  import torch
6
  from diffusers import DDPMScheduler, StableDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
7
  import p2p, generation, inversion
8
+ from diffusers import StableDiffusionInstructPix2PixPipeline, LCMScheduler
9
 
10
+ # InstructPix2Pix with LCM specified scheduler
11
+ pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
12
+ "timbrooks/instruct-pix2pix", torch_dtype=torch.float16
13
+ )
14
+ pipe = pipe.to("cuda")
15
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
16
 
17
+ # Adapt the InstructPix2Pix model using the LoRA parameters
18
+ adapter_id = "latent-consistency/lcm-lora-sdv1-5"
19
+ pipe.load_lora_weights(adapter_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  MAX_SEED = np.iinfo(np.int32).max
22
  MAX_IMAGE_SIZE = 1024
23
 
24
  @spaces.GPU(duration=30)
25
+ def infer(image, edit_instruction, guidance_scale, image_guidance_scale, n_steps):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ image = pipe(prompt=edit_instruction,
28
+ image=image,
29
+ num_inference_steps=n_steps,
30
+ guidance_scale=guidance_scale,
31
+ image_guidance_scale=image_guidance_scale,
32
+ ).images[0]
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  return image
35
 
36
  css="""
 
77
  )
78
  with gr.Row():
79
 
80
+ edit_instruction = gr.Text(
81
+ label="Edit instruction",
 
 
 
 
 
 
82
  max_lines=1,
83
  placeholder="Enter your prompt",
84
  )
 
87
  with gr.Row():
88
 
89
  with gr.Column():
90
+ image = gr.Image(label="Input image", height=512, width=512, show_label=False)
91
  with gr.Column():
92
  result = gr.Image(label="Result", height=512, width=512, show_label=False)
93
 
 
96
  with gr.Row():
97
 
98
  guidance_scale = gr.Slider(
99
+ label="guidance scale",
100
  minimum=1.0,
101
+ maximum=8.0,
102
  step=1.0,
103
+ value=2.0,
104
  )
105
 
106
+ image_guidance_scale = gr.Slider(
107
+ label="image guidance scale",
108
+ minimum=1.0,
109
+ maximum=8.0,
110
+ step=1.0,
111
+ value=1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  )
113
 
114
+ n_steps = gr.Slider(
115
+ label="inference steps",
116
+ minimum=1.0,
117
+ maximum=10.0,
 
 
 
 
 
 
 
118
  step=1.0,
119
+ value=4.0,
120
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  with gr.Row():
123
  run_button = gr.Button("Edit", scale=0)
 
252
  ],
253
  ]
254
 
255
+ #gr.Examples(
256
+ # examples = examples,
257
+ # inputs =[input_image, input_prompt, prompt,
258
+ # guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
259
+ # blend_orig, blend_edited, is_replacement],
260
+ # outputs=[
261
+ # result
262
+ # ],
263
+ # fn=infer, cache_examples=True
264
+ #)
265
 
266
  run_button.click(
267
  fn = infer,
268
+ inputs=[image, edit_instruction, guidance_scale, image_guidance_scale, n_steps]
 
 
269
  outputs = [result]
270
  )
271