Barak1 commited on
Commit
e3bc468
·
1 Parent(s): 7219157

1. Save in state only inversion results

Browse files
Files changed (2) hide show
  1. app.py +63 -23
  2. src/editor.py +47 -50
app.py CHANGED
@@ -35,8 +35,9 @@ if device == "cuda":
35
 
36
  scheduler_class = MyEulerAncestralDiscreteScheduler
37
 
38
- pipe_inversion = SDXLDDIMPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True) # .to('cpu')
39
- pipe_inference = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True) # .to('cpu')
 
40
  pipe_inference.scheduler = scheduler_class.from_config(pipe_inference.scheduler.config)
41
  pipe_inversion.scheduler = scheduler_class.from_config(pipe_inversion.scheduler.config)
42
  pipe_inversion.scheduler_inference = scheduler_class.from_config(pipe_inference.scheduler.config)
@@ -57,31 +58,27 @@ with gr.Blocks(css="style.css") as demo:
57
  editor_state = gr.State()
58
 
59
 
60
- @spaces.GPU
61
- def set_pipe(image_editor, input_image, description_prompt, edit_guidance_scale, num_inference_steps=4,
62
- num_inversion_steps=4, inversion_max_step=0.6, rnri_iterations=2, rnri_alpha=0.1, rnri_lr=0.2):
63
-
64
  if device == 'cuda':
65
- if image_editor is not None:
66
- image_editor = image_editor.to('cpu')
67
 
68
  torch.cuda.empty_cache()
69
-
70
  if input_image is None or not description_prompt:
71
  return None, "Please set all inputs."
72
 
73
- print('### description_prompt ', description_prompt)
74
- print('### input_image.size ', input_image.size)
75
-
76
- if isinstance(num_inference_steps, str): num_inference_steps = int(num_inference_steps)
77
- if isinstance(num_inversion_steps, str): num_inversion_steps = int(num_inversion_steps)
78
- if isinstance(edit_guidance_scale, str): edit_guidance_scale = float(edit_guidance_scale)
79
- if isinstance(inversion_max_step, str): inversion_max_step = float(inversion_max_step)
80
- if isinstance(rnri_iterations, str): rnri_iterations = int(rnri_iterations)
81
  if isinstance(rnri_alpha, str): rnri_alpha = float(rnri_alpha)
82
- if isinstance(rnri_lr, str): rnri_lr = float(rnri_lr)
83
-
84
-
85
  config = RunConfig(num_inference_steps=num_inference_steps,
86
  num_inversion_steps=num_inversion_steps,
87
  edit_guidance_scale=edit_guidance_scale,
@@ -92,8 +89,37 @@ with gr.Blocks(css="style.css") as demo:
92
  return image_editor, "Input has set!"
93
 
94
 
95
- @spaces.GPU
96
- def edit(editor, target_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if editor is None:
98
  raise gr.Error("Set inputs before editing.")
99
  # if device == "cuda":
@@ -102,6 +128,19 @@ with gr.Blocks(css="style.css") as demo:
102
  image = editor.edit(target_prompt)
103
  return image
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  with gr.Row():
106
  with gr.Column(elem_id="col-container-1"):
107
  with gr.Row():
@@ -165,7 +204,6 @@ with gr.Blocks(css="style.css") as demo:
165
  )
166
 
167
  with gr.Row():
168
- # set_.button = gr.Button("Set input image & description & settings", scale=1)
169
  is_set_text = gr.Text("", show_label=False)
170
 
171
  with gr.Column(elem_id="col-container-2"):
@@ -191,6 +229,8 @@ with gr.Blocks(css="style.css") as demo:
191
  inversion_max_step, rnri_iterations, rnri_alpha, rnri_lr],
192
  )
193
 
 
 
194
  input_image.change(set_pipe,
195
  inputs=[editor_state, input_image, description_prompt, edit_guidance_scale, num_inference_steps,
196
  num_inference_steps, inversion_max_step, rnri_iterations, rnri_alpha, rnri_lr],
 
35
 
36
  scheduler_class = MyEulerAncestralDiscreteScheduler
37
 
38
+ pipe_inversion = SDXLDDIMPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True).to(device)
39
+ pipe_inference = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo",
40
+ use_safetensors=True).to(device)
41
  pipe_inference.scheduler = scheduler_class.from_config(pipe_inference.scheduler.config)
42
  pipe_inversion.scheduler = scheduler_class.from_config(pipe_inversion.scheduler.config)
43
  pipe_inversion.scheduler_inference = scheduler_class.from_config(pipe_inference.scheduler.config)
 
58
  editor_state = gr.State()
59
 
60
 
61
+ # @spaces.GPU
62
+ def set_pipe1(image_editor, input_image, description_prompt, edit_guidance_scale, num_inference_steps=4,
63
+ num_inversion_steps=4, inversion_max_step=0.6, rnri_iterations=2, rnri_alpha=0.1, rnri_lr=0.2):
64
+
65
  if device == 'cuda':
66
+ # if image_editor is not None:
67
+ # image_editor = image_editor.to('cpu')
68
 
69
  torch.cuda.empty_cache()
70
+
71
  if input_image is None or not description_prompt:
72
  return None, "Please set all inputs."
73
 
74
+ if isinstance(num_inference_steps, str): num_inference_steps = int(num_inference_steps)
75
+ if isinstance(num_inversion_steps, str): num_inversion_steps = int(num_inversion_steps)
76
+ if isinstance(edit_guidance_scale, str): edit_guidance_scale = float(edit_guidance_scale)
77
+ if isinstance(inversion_max_step, str): inversion_max_step = float(inversion_max_step)
78
+ if isinstance(rnri_iterations, str): rnri_iterations = int(rnri_iterations)
 
 
 
79
  if isinstance(rnri_alpha, str): rnri_alpha = float(rnri_alpha)
80
+ if isinstance(rnri_lr, str): rnri_lr = float(rnri_lr)
81
+
 
82
  config = RunConfig(num_inference_steps=num_inference_steps,
83
  num_inversion_steps=num_inversion_steps,
84
  edit_guidance_scale=edit_guidance_scale,
 
89
  return image_editor, "Input has set!"
90
 
91
 
92
+ # @spaces.GPU
93
+ def set_pipe(inversion_state, input_image, description_prompt, edit_guidance_scale, num_inference_steps=4,
94
+ num_inversion_steps=4, inversion_max_step=0.6, rnri_iterations=2, rnri_alpha=0.1, rnri_lr=0.2):
95
+ if device == 'cuda':
96
+ # if image_editor is not None:
97
+ # image_editor = image_editor.to('cpu')
98
+
99
+ torch.cuda.empty_cache()
100
+
101
+ if input_image is None or not description_prompt:
102
+ return None, "Please set all inputs."
103
+
104
+ if isinstance(num_inference_steps, str): num_inference_steps = int(num_inference_steps)
105
+ if isinstance(num_inversion_steps, str): num_inversion_steps = int(num_inversion_steps)
106
+ if isinstance(edit_guidance_scale, str): edit_guidance_scale = float(edit_guidance_scale)
107
+ if isinstance(inversion_max_step, str): inversion_max_step = float(inversion_max_step)
108
+ if isinstance(rnri_iterations, str): rnri_iterations = int(rnri_iterations)
109
+ if isinstance(rnri_alpha, str): rnri_alpha = float(rnri_alpha)
110
+ if isinstance(rnri_lr, str): rnri_lr = float(rnri_lr)
111
+
112
+ config = RunConfig(num_inference_steps=num_inference_steps,
113
+ num_inversion_steps=num_inversion_steps,
114
+ edit_guidance_scale=edit_guidance_scale,
115
+ inversion_max_step=inversion_max_step)
116
+ inversion_state = ImageEditorDemo.invert(pipe_inversion, input_image, description_prompt, config,
117
+ [rnri_iterations, rnri_alpha, rnri_lr], device)
118
+ return inversion_state, "Input has set!"
119
+
120
+
121
+ # @spaces.GPU
122
+ def edit1(editor, target_prompt):
123
  if editor is None:
124
  raise gr.Error("Set inputs before editing.")
125
  # if device == "cuda":
 
128
  image = editor.edit(target_prompt)
129
  return image
130
 
131
+
132
+ # @spaces.GPU
133
+ def edit(inversion_state, target_prompt):
134
+ if inversion_state is None:
135
+ raise gr.Error("Set inputs before editing.")
136
+ # if device == "cuda":
137
+ # image = editor.to(device).edit(target_prompt)
138
+ # else:
139
+ image = ImageEditorDemo.edit(pipe_inference, target_prompt, inversion_state['latent'], inversion_state['noise'],
140
+ inversion_state['cfg'], inversion_state['cfg'].edit_guidance_scale)
141
+ return image
142
+
143
+
144
  with gr.Row():
145
  with gr.Column(elem_id="col-container-1"):
146
  with gr.Row():
 
204
  )
205
 
206
  with gr.Row():
 
207
  is_set_text = gr.Text("", show_label=False)
208
 
209
  with gr.Column(elem_id="col-container-2"):
 
229
  inversion_max_step, rnri_iterations, rnri_alpha, rnri_lr],
230
  )
231
 
232
+ gr.Markdown(f"""Disclaimer: Performance may be inferior to the reported in the paper due to hardware limitation.
233
+ """)
234
  input_image.change(set_pipe,
235
  inputs=[editor_state, input_image, description_prompt, edit_guidance_scale, num_inference_steps,
236
  num_inference_steps, inversion_max_step, rnri_iterations, rnri_alpha, rnri_lr],
src/editor.py CHANGED
@@ -11,9 +11,11 @@ from diffusers.utils.torch_utils import randn_tensor
11
  def inversion_callback(pipe, step, timestep, callback_kwargs):
12
  return callback_kwargs
13
 
 
14
  def inference_callback(pipe, step, timestep, callback_kwargs):
15
  return callback_kwargs
16
 
 
17
  def center_crop(im):
18
  width, height = im.size # Get dimensions
19
  min_dim = min(width, height)
@@ -36,67 +38,62 @@ def load_im_into_format_from_path(im_path):
36
 
37
  class ImageEditorDemo:
38
  def __init__(self, pipe_inversion, pipe_inference, input_image, description_prompt, cfg, device, inv_hp):
39
- self.pipe_inversion = pipe_inversion
40
- self.pipe_inference = pipe_inference
41
  self.original_image = load_im_into_format_from_path(input_image).convert("RGB")
42
- self.load_image = True
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  g_cpu = torch.Generator().manual_seed(7865)
44
- img_size = (512,512)
45
  VQAE_SCALE = 8
46
  latents_size = (1, 4, img_size[0] // VQAE_SCALE, img_size[1] // VQAE_SCALE)
47
  noise = [randn_tensor(latents_size, dtype=torch.float16, device=torch.device(device), generator=g_cpu) for i
48
  in range(cfg.num_inversion_steps)]
 
49
  pipe_inversion.scheduler.set_noise_list(noise)
50
  pipe_inversion.scheduler_inference.set_noise_list(noise)
51
  pipe_inversion.set_progress_bar_config(disable=True)
52
- self.cfg = cfg
53
- self.pipe_inversion.cfg = cfg
54
- self.pipe_inference.cfg = cfg
55
- self.inv_hp = inv_hp # [2, 0.1, 0.2]
56
- self.edit_cfg = cfg.edit_guidance_scale
57
-
58
- self.pipe_inversion = self.pipe_inversion.to(device)
59
- self.last_latent = self.invert(self.original_image, description_prompt)
60
- self.original_latent = self.last_latent
61
-
62
- # if device == 'cuda':
63
- # after the inversion, we can move the inversion model to the CPU
64
- self.pipe_inversion = self.pipe_inversion.to('cpu')
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  pipe_inference.scheduler.set_noise_list(noise)
67
  pipe_inference.set_progress_bar_config(disable=True)
68
- self.pipe_inference = self.pipe_inference.to(device)
69
-
70
- def invert(self, init_image, base_prompt):
71
- res = self.pipe_inversion(prompt=base_prompt,
72
- num_inversion_steps=self.cfg.num_inversion_steps,
73
- num_inference_steps=self.cfg.num_inference_steps,
74
- image=init_image,
75
- guidance_scale=self.cfg.inversion_guidance_scale,
76
- callback_on_step_end=inversion_callback,
77
- strength=self.cfg.inversion_max_step,
78
- denoising_start=1.0 - self.cfg.inversion_max_step,
79
- inv_hp=self.inv_hp)[0][0]
80
- return res
81
-
82
- def edit(self, target_prompt):
83
- image = self.pipe_inference(prompt=target_prompt,
84
- num_inference_steps=self.cfg.num_inference_steps,
85
- negative_prompt="",
86
- callback_on_step_end=inference_callback,
87
- image=self.last_latent,
88
- strength=self.cfg.inversion_max_step,
89
- denoising_start=1.0 - self.cfg.inversion_max_step,
90
- guidance_scale=self.edit_cfg).images[0]
91
  return image
92
 
93
- def to(self, device):
94
- self.pipe_inference = self.pipe_inference.to(device)
95
- self.pipe_inversion = self.pipe_inversion.to(device)
96
- self.last_latent = self.last_latent.to(device)
97
- self.original_latent = self.original_latent.to(device)
98
-
99
- self.pipe_inversion.scheduler.set_noise_list_device(device)
100
- self.pipe_inference.scheduler.set_noise_list_device(device)
101
- self.pipe_inversion.scheduler_inference.set_noise_list_device(device)
102
- return self
 
11
  def inversion_callback(pipe, step, timestep, callback_kwargs):
12
  return callback_kwargs
13
 
14
+
15
  def inference_callback(pipe, step, timestep, callback_kwargs):
16
  return callback_kwargs
17
 
18
+
19
  def center_crop(im):
20
  width, height = im.size # Get dimensions
21
  min_dim = min(width, height)
 
38
 
39
  class ImageEditorDemo:
40
  def __init__(self, pipe_inversion, pipe_inference, input_image, description_prompt, cfg, device, inv_hp):
 
 
41
  self.original_image = load_im_into_format_from_path(input_image).convert("RGB")
42
+
43
+
44
+ # self.pipe_inversion = self.pipe_inversion.to(device)
45
+ # self.last_latent = self.invert(pipe_inversion, self.original_image, description_prompt)
46
+
47
+ # if device == 'cuda':
48
+ # after the inversion, we can move the inversion model to the CPU
49
+ # self.pipe_inversion = self.pipe_inversion.to('cpu')
50
+
51
+ # self.pipe_inference = self.pipe_inference.to(device)
52
+
53
+ @staticmethod
54
+ def invert(pipe_inversion, init_image, base_prompt, cfg, inv_hp, device):
55
+ init_image = load_im_into_format_from_path(init_image).convert("RGB")
56
  g_cpu = torch.Generator().manual_seed(7865)
57
+ img_size = (512, 512)
58
  VQAE_SCALE = 8
59
  latents_size = (1, 4, img_size[0] // VQAE_SCALE, img_size[1] // VQAE_SCALE)
60
  noise = [randn_tensor(latents_size, dtype=torch.float16, device=torch.device(device), generator=g_cpu) for i
61
  in range(cfg.num_inversion_steps)]
62
+ pipe_inversion.cfg = cfg
63
  pipe_inversion.scheduler.set_noise_list(noise)
64
  pipe_inversion.scheduler_inference.set_noise_list(noise)
65
  pipe_inversion.set_progress_bar_config(disable=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ res = pipe_inversion(prompt=base_prompt,
68
+ num_inversion_steps=cfg.num_inversion_steps,
69
+ num_inference_steps=cfg.num_inference_steps,
70
+ image=init_image,
71
+ guidance_scale=cfg.inversion_guidance_scale,
72
+ strength=cfg.inversion_max_step,
73
+ denoising_start=1.0 - cfg.inversion_max_step,
74
+ inv_hp=inv_hp)[0][0]
75
+ return {"latent": res, "noise": noise, "cfg": cfg}
76
+
77
+ @staticmethod
78
+ def edit(pipe_inference, target_prompt, last_latent, noise, cfg, edit_cfg):
79
+ pipe_inference.cfg = cfg
80
  pipe_inference.scheduler.set_noise_list(noise)
81
  pipe_inference.set_progress_bar_config(disable=True)
82
+ image = pipe_inference(prompt=target_prompt,
83
+ num_inference_steps=cfg.num_inference_steps,
84
+ negative_prompt="",
85
+ image=last_latent,
86
+ strength=cfg.inversion_max_step,
87
+ denoising_start=1.0 - cfg.inversion_max_step,
88
+ guidance_scale=edit_cfg).images[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return image
90
 
91
+ # def to(self, device):
92
+ # self.pipe_inference = self.pipe_inference.to(device)
93
+ # self.pipe_inversion = self.pipe_inversion.to(device)
94
+ # self.last_latent = self.last_latent.to(device)
95
+ #
96
+ # self.pipe_inversion.scheduler.set_noise_list_device(device)
97
+ # self.pipe_inference.scheduler.set_noise_list_device(device)
98
+ # self.pipe_inversion.scheduler_inference.set_noise_list_device(device)
99
+ # return self