Spaces:

jixin0101
/

ObjectClear

Running on Zero

App Files Files Community

jixin0101 commited on Aug 11

Commit

18bc62a

1 Parent(s): a350cc0

Add Adaptive Strength Diffusion

Browse files

Files changed (2) hide show

app.py +2 -11
pipeline_objectclear.py +13 -2

app.py CHANGED Viewed

@@ -186,7 +186,7 @@ pipe = ObjectClearPipeline.from_pretrained_with_custom_modules(
 pipe.to(device)
 @spaces.GPU
-def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps, strength
             ):
     generator = torch.Generator(device="cuda").manual_seed(seed)
     image_np = image_state["origin_image"]
@@ -219,7 +219,6 @@ def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed,
         mask_image=mask,
         generator=generator,
         num_inference_steps=num_inference_steps,
-        strength=strength,
         guidance_scale=guidance_scale,
         height=h,
         width=w,
@@ -432,13 +431,6 @@ with gr.Blocks(css=custom_css) as demo:
             )
             with gr.Accordion('ObjectClear Settings', open=True):
-                strength = gr.Radio(
-                    choices=[0.99, 1.0],
-                    value=0.99,
-                    label="Strength",
-                    info="0.99 better preserves the background and color; use 1.0 if object/shadow is not fully removed (default: 0.99)"
-                )
                 guidance_scale = gr.Slider(
                     minimum=1, maximum=10, step=0.5, value=2.5,
                     label="Guidance Scale",
@@ -517,8 +509,7 @@ with gr.Blocks(css=custom_css) as demo:
             mask_dropdown,
             guidance_scale,
             seed,
-            num_inference_steps,
-            strength
         ],
         outputs=[
             output_image_component, output_compare_image_component

 pipe.to(device)
 @spaces.GPU
+def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
             ):
     generator = torch.Generator(device="cuda").manual_seed(seed)
     image_np = image_state["origin_image"]
         mask_image=mask,
         generator=generator,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
         height=h,
         width=w,
             )
             with gr.Accordion('ObjectClear Settings', open=True):
                 guidance_scale = gr.Slider(
                     minimum=1, maximum=10, step=0.5, value=2.5,
                     label="Guidance Scale",
             mask_dropdown,
             guidance_scale,
             seed,
+            num_inference_steps
         ],
         outputs=[
             output_image_component, output_compare_image_component

pipeline_objectclear.py CHANGED Viewed

@@ -1352,7 +1352,7 @@ class ObjectClearPipeline(
         height: Optional[int] = None,
         width: Optional[int] = None,
         padding_mask_crop: Optional[int] = None,
-        strength: float = 0.9999,
         num_inference_steps: int = 50,
         timesteps: List[int] = None,
         sigmas: List[float] = None,
@@ -1426,7 +1426,7 @@ class ObjectClearPipeline(
                 on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
                 resizing to the original image size for inpainting. This is useful when the masked area is small while
                 the image is large and contain information irrelevant for inpainting, such as background.
-            strength (`float`, *optional*, defaults to 0.9999):
                 Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
                 between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
                 `strength`. The number of denoising steps depends on the amount of noise initially added. When
@@ -1914,6 +1914,17 @@ class ObjectClearPipeline(
                 # progressive attention mask blending
                 fuse_index = 5
                 if self.config.apply_attention_guided_fusion:
                     if i == len(timesteps) - 1:
                         attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
                         attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)

         height: Optional[int] = None,
         width: Optional[int] = None,
         padding_mask_crop: Optional[int] = None,
+        strength: float = 1.0
         num_inference_steps: int = 50,
         timesteps: List[int] = None,
         sigmas: List[float] = None,
                 on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
                 resizing to the original image size for inpainting. This is useful when the masked area is small while
                 the image is large and contain information irrelevant for inpainting, such as background.
+            strength (`float`, *optional*, defaults to 1.0):
                 Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
                 between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
                 `strength`. The number of denoising steps depends on the amount of noise initially added. When
                 # progressive attention mask blending
                 fuse_index = 5
                 if self.config.apply_attention_guided_fusion:
+                    if i == 0:
+                        init_latents_proper = image_latents
+                        init_mask = mask[0:1]
+                        noise_timestep = timesteps[i + 1]
+                        init_latents_proper = self.scheduler.add_noise(
+                            init_latents_proper, noise, torch.tensor([noise_timestep])
+                        )
+                        latents = (1 - init_mask) * init_latents_proper + init_mask * latents
                     if i == len(timesteps) - 1:
                         attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
                         attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)