jixin0101 commited on
Commit
18bc62a
·
1 Parent(s): a350cc0

Add Adaptive Strength Diffusion

Browse files
Files changed (2) hide show
  1. app.py +2 -11
  2. pipeline_objectclear.py +13 -2
app.py CHANGED
@@ -186,7 +186,7 @@ pipe = ObjectClearPipeline.from_pretrained_with_custom_modules(
186
  pipe.to(device)
187
 
188
  @spaces.GPU
189
- def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps, strength
190
  ):
191
  generator = torch.Generator(device="cuda").manual_seed(seed)
192
  image_np = image_state["origin_image"]
@@ -219,7 +219,6 @@ def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed,
219
  mask_image=mask,
220
  generator=generator,
221
  num_inference_steps=num_inference_steps,
222
- strength=strength,
223
  guidance_scale=guidance_scale,
224
  height=h,
225
  width=w,
@@ -432,13 +431,6 @@ with gr.Blocks(css=custom_css) as demo:
432
  )
433
 
434
  with gr.Accordion('ObjectClear Settings', open=True):
435
- strength = gr.Radio(
436
- choices=[0.99, 1.0],
437
- value=0.99,
438
- label="Strength",
439
- info="0.99 better preserves the background and color; use 1.0 if object/shadow is not fully removed (default: 0.99)"
440
- )
441
-
442
  guidance_scale = gr.Slider(
443
  minimum=1, maximum=10, step=0.5, value=2.5,
444
  label="Guidance Scale",
@@ -517,8 +509,7 @@ with gr.Blocks(css=custom_css) as demo:
517
  mask_dropdown,
518
  guidance_scale,
519
  seed,
520
- num_inference_steps,
521
- strength
522
  ],
523
  outputs=[
524
  output_image_component, output_compare_image_component
 
186
  pipe.to(device)
187
 
188
  @spaces.GPU
189
+ def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
190
  ):
191
  generator = torch.Generator(device="cuda").manual_seed(seed)
192
  image_np = image_state["origin_image"]
 
219
  mask_image=mask,
220
  generator=generator,
221
  num_inference_steps=num_inference_steps,
 
222
  guidance_scale=guidance_scale,
223
  height=h,
224
  width=w,
 
431
  )
432
 
433
  with gr.Accordion('ObjectClear Settings', open=True):
 
 
 
 
 
 
 
434
  guidance_scale = gr.Slider(
435
  minimum=1, maximum=10, step=0.5, value=2.5,
436
  label="Guidance Scale",
 
509
  mask_dropdown,
510
  guidance_scale,
511
  seed,
512
+ num_inference_steps
 
513
  ],
514
  outputs=[
515
  output_image_component, output_compare_image_component
pipeline_objectclear.py CHANGED
@@ -1352,7 +1352,7 @@ class ObjectClearPipeline(
1352
  height: Optional[int] = None,
1353
  width: Optional[int] = None,
1354
  padding_mask_crop: Optional[int] = None,
1355
- strength: float = 0.9999,
1356
  num_inference_steps: int = 50,
1357
  timesteps: List[int] = None,
1358
  sigmas: List[float] = None,
@@ -1426,7 +1426,7 @@ class ObjectClearPipeline(
1426
  on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
1427
  resizing to the original image size for inpainting. This is useful when the masked area is small while
1428
  the image is large and contain information irrelevant for inpainting, such as background.
1429
- strength (`float`, *optional*, defaults to 0.9999):
1430
  Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
1431
  between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
1432
  `strength`. The number of denoising steps depends on the amount of noise initially added. When
@@ -1914,6 +1914,17 @@ class ObjectClearPipeline(
1914
  # progressive attention mask blending
1915
  fuse_index = 5
1916
  if self.config.apply_attention_guided_fusion:
 
 
 
 
 
 
 
 
 
 
 
1917
  if i == len(timesteps) - 1:
1918
  attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
1919
  attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)
 
1352
  height: Optional[int] = None,
1353
  width: Optional[int] = None,
1354
  padding_mask_crop: Optional[int] = None,
1355
+ strength: float = 1.0
1356
  num_inference_steps: int = 50,
1357
  timesteps: List[int] = None,
1358
  sigmas: List[float] = None,
 
1426
  on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
1427
  resizing to the original image size for inpainting. This is useful when the masked area is small while
1428
  the image is large and contain information irrelevant for inpainting, such as background.
1429
+ strength (`float`, *optional*, defaults to 1.0):
1430
  Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
1431
  between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
1432
  `strength`. The number of denoising steps depends on the amount of noise initially added. When
 
1914
  # progressive attention mask blending
1915
  fuse_index = 5
1916
  if self.config.apply_attention_guided_fusion:
1917
+ if i == 0:
1918
+ init_latents_proper = image_latents
1919
+ init_mask = mask[0:1]
1920
+
1921
+ noise_timestep = timesteps[i + 1]
1922
+ init_latents_proper = self.scheduler.add_noise(
1923
+ init_latents_proper, noise, torch.tensor([noise_timestep])
1924
+ )
1925
+
1926
+ latents = (1 - init_mask) * init_latents_proper + init_mask * latents
1927
+
1928
  if i == len(timesteps) - 1:
1929
  attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
1930
  attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)