Spaces:
Running
on
Zero
Running
on
Zero
Add Adaptive Strength Diffusion
Browse files- app.py +2 -11
- pipeline_objectclear.py +13 -2
app.py
CHANGED
@@ -186,7 +186,7 @@ pipe = ObjectClearPipeline.from_pretrained_with_custom_modules(
|
|
186 |
pipe.to(device)
|
187 |
|
188 |
@spaces.GPU
|
189 |
-
def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
|
190 |
):
|
191 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
192 |
image_np = image_state["origin_image"]
|
@@ -219,7 +219,6 @@ def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed,
|
|
219 |
mask_image=mask,
|
220 |
generator=generator,
|
221 |
num_inference_steps=num_inference_steps,
|
222 |
-
strength=strength,
|
223 |
guidance_scale=guidance_scale,
|
224 |
height=h,
|
225 |
width=w,
|
@@ -432,13 +431,6 @@ with gr.Blocks(css=custom_css) as demo:
|
|
432 |
)
|
433 |
|
434 |
with gr.Accordion('ObjectClear Settings', open=True):
|
435 |
-
strength = gr.Radio(
|
436 |
-
choices=[0.99, 1.0],
|
437 |
-
value=0.99,
|
438 |
-
label="Strength",
|
439 |
-
info="0.99 better preserves the background and color; use 1.0 if object/shadow is not fully removed (default: 0.99)"
|
440 |
-
)
|
441 |
-
|
442 |
guidance_scale = gr.Slider(
|
443 |
minimum=1, maximum=10, step=0.5, value=2.5,
|
444 |
label="Guidance Scale",
|
@@ -517,8 +509,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
517 |
mask_dropdown,
|
518 |
guidance_scale,
|
519 |
seed,
|
520 |
-
num_inference_steps
|
521 |
-
strength
|
522 |
],
|
523 |
outputs=[
|
524 |
output_image_component, output_compare_image_component
|
|
|
186 |
pipe.to(device)
|
187 |
|
188 |
@spaces.GPU
|
189 |
+
def process(image_state, interactive_state, mask_dropdown, guidance_scale, seed, num_inference_steps
|
190 |
):
|
191 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
192 |
image_np = image_state["origin_image"]
|
|
|
219 |
mask_image=mask,
|
220 |
generator=generator,
|
221 |
num_inference_steps=num_inference_steps,
|
|
|
222 |
guidance_scale=guidance_scale,
|
223 |
height=h,
|
224 |
width=w,
|
|
|
431 |
)
|
432 |
|
433 |
with gr.Accordion('ObjectClear Settings', open=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
guidance_scale = gr.Slider(
|
435 |
minimum=1, maximum=10, step=0.5, value=2.5,
|
436 |
label="Guidance Scale",
|
|
|
509 |
mask_dropdown,
|
510 |
guidance_scale,
|
511 |
seed,
|
512 |
+
num_inference_steps
|
|
|
513 |
],
|
514 |
outputs=[
|
515 |
output_image_component, output_compare_image_component
|
pipeline_objectclear.py
CHANGED
@@ -1352,7 +1352,7 @@ class ObjectClearPipeline(
|
|
1352 |
height: Optional[int] = None,
|
1353 |
width: Optional[int] = None,
|
1354 |
padding_mask_crop: Optional[int] = None,
|
1355 |
-
strength: float = 0
|
1356 |
num_inference_steps: int = 50,
|
1357 |
timesteps: List[int] = None,
|
1358 |
sigmas: List[float] = None,
|
@@ -1426,7 +1426,7 @@ class ObjectClearPipeline(
|
|
1426 |
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
1427 |
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
1428 |
the image is large and contain information irrelevant for inpainting, such as background.
|
1429 |
-
strength (`float`, *optional*, defaults to 0
|
1430 |
Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
|
1431 |
between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
|
1432 |
`strength`. The number of denoising steps depends on the amount of noise initially added. When
|
@@ -1914,6 +1914,17 @@ class ObjectClearPipeline(
|
|
1914 |
# progressive attention mask blending
|
1915 |
fuse_index = 5
|
1916 |
if self.config.apply_attention_guided_fusion:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1917 |
if i == len(timesteps) - 1:
|
1918 |
attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
|
1919 |
attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)
|
|
|
1352 |
height: Optional[int] = None,
|
1353 |
width: Optional[int] = None,
|
1354 |
padding_mask_crop: Optional[int] = None,
|
1355 |
+
strength: float = 1.0
|
1356 |
num_inference_steps: int = 50,
|
1357 |
timesteps: List[int] = None,
|
1358 |
sigmas: List[float] = None,
|
|
|
1426 |
on `padding_mask_crop`. The image and mask_image will then be cropped based on the expanded area before
|
1427 |
resizing to the original image size for inpainting. This is useful when the masked area is small while
|
1428 |
the image is large and contain information irrelevant for inpainting, such as background.
|
1429 |
+
strength (`float`, *optional*, defaults to 1.0):
|
1430 |
Conceptually, indicates how much to transform the masked portion of the reference `image`. Must be
|
1431 |
between 0 and 1. `image` will be used as a starting point, adding more noise to it the larger the
|
1432 |
`strength`. The number of denoising steps depends on the amount of noise initially added. When
|
|
|
1914 |
# progressive attention mask blending
|
1915 |
fuse_index = 5
|
1916 |
if self.config.apply_attention_guided_fusion:
|
1917 |
+
if i == 0:
|
1918 |
+
init_latents_proper = image_latents
|
1919 |
+
init_mask = mask[0:1]
|
1920 |
+
|
1921 |
+
noise_timestep = timesteps[i + 1]
|
1922 |
+
init_latents_proper = self.scheduler.add_noise(
|
1923 |
+
init_latents_proper, noise, torch.tensor([noise_timestep])
|
1924 |
+
)
|
1925 |
+
|
1926 |
+
latents = (1 - init_mask) * init_latents_proper + init_mask * latents
|
1927 |
+
|
1928 |
if i == len(timesteps) - 1:
|
1929 |
attn_key, attn_map = next(iter(self.cross_attention_scores.items()))
|
1930 |
attn_map = self.resize_attn_map_divide2(attn_map, mask, fuse_index)
|