Spaces:

afmck
/

stable-diffusion-inpainting-segmentation

Runtime error

App Files Files Community

Alexander McKinney commited on Nov 14, 2022

Commit

d16d053

1 Parent(s): 8cd1abb

adds comments to code

Browse files

Files changed (1) hide show

app.py +23 -14

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from diffusers import StableDiffusionInpaintPipeline
 torch.inference_mode()
 torch.no_grad()
 def load_segmentation_models(model_name: str = 'facebook/detr-resnet-50-panoptic'):
     feature_extractor = DetrFeatureExtractor.from_pretrained(model_name)
     model = DetrForSegmentation.from_pretrained(model_name)
@@ -24,6 +25,7 @@ def load_segmentation_models(model_name: str = 'facebook/detr-resnet-50-panoptic
     return feature_extractor, model, cfg
 def load_diffusion_pipeline(model_name: str = 'runwayml/stable-diffusion-inpainting'):
     return StableDiffusionInpaintPipeline.from_pretrained(
         model_name,
@@ -31,6 +33,7 @@ def load_diffusion_pipeline(model_name: str = 'runwayml/stable-diffusion-inpaint
         torch_dtype=torch.float16
     )
 def get_device(try_cuda=True):
     return torch.device('cuda' if try_cuda and torch.cuda.is_available() else 'cpu')
@@ -42,6 +45,7 @@ def max_pool(x: torch.Tensor, kernel_size: int):
     pad_size = (kernel_size - 1) // 2
     return torch.nn.functional.max_pool2d(x, kernel_size, (1, 1), padding=pad_size)
 def clean_mask(mask, max_kernel: int = 23, min_kernel: int = 5):
     mask = torch.Tensor(mask[None, None]).float()
     mask = min_pool(mask, min_kernel)
@@ -49,13 +53,14 @@ def clean_mask(mask, max_kernel: int = 23, min_kernel: int = 5):
     mask = mask.bool().squeeze().numpy()
     return mask
-device = get_device()
 feature_extractor, segmentation_model, segmentation_cfg = load_segmentation_models()
 pipe = load_diffusion_pipeline()
 pipe = pipe.to(device)
 def fn_segmentation(image, max_kernel, min_kernel):
     inputs = feature_extractor(images=image, return_tensors="pt")
     outputs = segmentation_model(**inputs)
@@ -81,17 +86,7 @@ def fn_segmentation(image, max_kernel, min_kernel):
     return raw_masks, checkbox_group, gr.Image.update(value=np.zeros((image.height, image.width))), gr.Image.update(value=image)
-def fn_clean(masks, max_kernel, min_kernel):
-    out = []
-    for m in masks:
-        m = torch.FloatTensor(m)[None, None]
-        m = min_pool(m, min_kernel)
-        m = max_pool(m, max_kernel)
-        m = m.squeeze().numpy().astype(np.uint8)
-        out.append(m)
-    return out
 def fn_update_mask(
         image: Image,
         masks: List[np.array],
@@ -108,6 +103,7 @@ def fn_update_mask(
     return combined_mask.astype(np.uint8) * 255, Image.fromarray(masked_image)
 def fn_diffusion(
         prompt: str,
         masked_image: Image,
@@ -118,6 +114,9 @@ def fn_diffusion(
     ):
     if len(negative_prompt) == 0:
         negative_prompt = None
     STABLE_DIFFUSION_SMALL_EDGE = 512
     w, h = masked_image.size
@@ -133,6 +132,7 @@ def fn_diffusion(
     mask = Image.fromarray(mask).convert("RGB").resize((new_width, new_height))
     masked_image = masked_image.convert("RGB").resize((new_width, new_height))
     inpainted_image = pipe(
         height=new_height,
         width=new_width,
@@ -144,6 +144,7 @@ def fn_diffusion(
         negative_prompt=negative_prompt
     ).images[0]
     inpainted_image = inpainted_image.resize((w, h))
     return inpainted_image
@@ -151,21 +152,24 @@ def fn_diffusion(
 demo = gr.Blocks()
 with demo:
     input_image = gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil', label="Input Image")
     bt_masks = gr.Button("Compute Masks")
     with gr.Row():
         mask_image = gr.Image(type='numpy', label="Diffusion Mask")
         masked_image = gr.Image(type='pil', label="Masked Image")
     mask_storage = gr.State()
     with gr.Row():
         max_slider = gr.Slider(minimum=1, maximum=99, value=23, step=2, label="Mask Overflow")
         min_slider = gr.Slider(minimum=1, maximum=99, value=5, step=2, label="Mask Denoising")
         mask_checkboxes = gr.CheckboxGroup(interactive=True, label="Mask Selection")
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox("Two ginger cats lying together on a pink sofa. There are two TV remotes. High definition.", label="Prompt")
@@ -180,14 +184,19 @@ with demo:
     update_mask_inputs = [input_image, mask_storage, mask_checkboxes, max_slider, min_slider]
     update_mask_outputs = [mask_image, masked_image]
     input_image.change(lambda: gr.CheckboxGroup.update(choices=[], value=[]), outputs=mask_checkboxes)
     bt_masks.click(fn_segmentation, inputs=[input_image, max_slider, min_slider], outputs=[mask_storage, mask_checkboxes, mask_image, masked_image])
     max_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
     min_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
     mask_checkboxes.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
     bt_diffusion.click(fn_diffusion, inputs=[
         prompt,
         masked_image,

 torch.inference_mode()
 torch.no_grad()
+# Load segmentation models
 def load_segmentation_models(model_name: str = 'facebook/detr-resnet-50-panoptic'):
     feature_extractor = DetrFeatureExtractor.from_pretrained(model_name)
     model = DetrForSegmentation.from_pretrained(model_name)
     return feature_extractor, model, cfg
+# Load diffusion pipeline
 def load_diffusion_pipeline(model_name: str = 'runwayml/stable-diffusion-inpainting'):
     return StableDiffusionInpaintPipeline.from_pretrained(
         model_name,
         torch_dtype=torch.float16
     )
+# Device helper
 def get_device(try_cuda=True):
     return torch.device('cuda' if try_cuda and torch.cuda.is_available() else 'cpu')
     pad_size = (kernel_size - 1) // 2
     return torch.nn.functional.max_pool2d(x, kernel_size, (1, 1), padding=pad_size)
+# Apply min-max pooling to clean up mask
 def clean_mask(mask, max_kernel: int = 23, min_kernel: int = 5):
     mask = torch.Tensor(mask[None, None]).float()
     mask = min_pool(mask, min_kernel)
     mask = mask.bool().squeeze().numpy()
     return mask
 feature_extractor, segmentation_model, segmentation_cfg = load_segmentation_models()
 pipe = load_diffusion_pipeline()
+device = get_device()
 pipe = pipe.to(device)
+# Callback function that runs segmentation and updates CheckboxGroup
 def fn_segmentation(image, max_kernel, min_kernel):
     inputs = feature_extractor(images=image, return_tensors="pt")
     outputs = segmentation_model(**inputs)
     return raw_masks, checkbox_group, gr.Image.update(value=np.zeros((image.height, image.width))), gr.Image.update(value=image)
+# Callback function that updates the displayed mask based on selected checkboxes
 def fn_update_mask(
         image: Image,
         masks: List[np.array],
     return combined_mask.astype(np.uint8) * 255, Image.fromarray(masked_image)
+# Callback function that runs diffusion given the current image, mask and prompt.
 def fn_diffusion(
         prompt: str,
         masked_image: Image,
     ):
     if len(negative_prompt) == 0:
         negative_prompt = None
+    # Resize image to a more stable diffusion friendly format.
+    # TODO: remove magic number
     STABLE_DIFFUSION_SMALL_EDGE = 512
     w, h = masked_image.size
     mask = Image.fromarray(mask).convert("RGB").resize((new_width, new_height))
     masked_image = masked_image.convert("RGB").resize((new_width, new_height))
+    # Run diffusion
     inpainted_image = pipe(
         height=new_height,
         width=new_width,
         negative_prompt=negative_prompt
     ).images[0]
+    # Resize back to the original size
     inpainted_image = inpainted_image.resize((w, h))
     return inpainted_image
 demo = gr.Blocks()
 with demo:
+    # Input image control
     input_image = gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil', label="Input Image")
+    # Combined mask controls
     bt_masks = gr.Button("Compute Masks")
     with gr.Row():
         mask_image = gr.Image(type='numpy', label="Diffusion Mask")
         masked_image = gr.Image(type='pil', label="Masked Image")
     mask_storage = gr.State()
+    # Mask editing controls
     with gr.Row():
         max_slider = gr.Slider(minimum=1, maximum=99, value=23, step=2, label="Mask Overflow")
         min_slider = gr.Slider(minimum=1, maximum=99, value=5, step=2, label="Mask Denoising")
         mask_checkboxes = gr.CheckboxGroup(interactive=True, label="Mask Selection")
+    # Diffusion controls and output
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox("Two ginger cats lying together on a pink sofa. There are two TV remotes. High definition.", label="Prompt")
     update_mask_inputs = [input_image, mask_storage, mask_checkboxes, max_slider, min_slider]
     update_mask_outputs = [mask_image, masked_image]
+    # Clear checkbox group on input image change
     input_image.change(lambda: gr.CheckboxGroup.update(choices=[], value=[]), outputs=mask_checkboxes)
+    # Segmentation button callback
     bt_masks.click(fn_segmentation, inputs=[input_image, max_slider, min_slider], outputs=[mask_storage, mask_checkboxes, mask_image, masked_image])
+    # Update mask callbacks
+    # TODO: can we replace this with `mask_image.change`? Not sure if it will actively update.
     max_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
     min_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
     mask_checkboxes.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
+    # Diffusion button callback
     bt_diffusion.click(fn_diffusion, inputs=[
         prompt,
         masked_image,