Spaces:

ahmetyaylalioglu
/

Inpaint-Diffusion-with-Prompts

Runtime error

App Files Files Community

ahmetyaylalioglu commited on Aug 1, 2024

Commit

c636056

verified ·

1 Parent(s): 8007933

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -14

app.py CHANGED Viewed

@@ -3,11 +3,11 @@ from PIL import Image
 import numpy as np
 from transformers import SamModel, SamProcessor
 from diffusers import AutoPipelineForInpainting
-from diffusers.models.autoencoders.vq_model import VQEncoderOutput, VQModel
 import torch
-# Force the model to use CPU
-device = "cpu"
 # Model and Processor setup
 model_name = "facebook/sam-vit-huge"
@@ -15,14 +15,14 @@ model = SamModel.from_pretrained(model_name).to(device)
 processor = SamProcessor.from_pretrained(model_name)
 def mask_to_rgb(mask):
     bg_transparent = np.zeros(mask.shape + (4,), dtype=np.uint8)
-    bg_transparent[mask == 1] = [0, 255, 0, 127]
     return bg_transparent
-def get_processed_inputs(image, points_str):
-    points = [list(map(int, point.split(','))) for point in points_str.split()]
-    input_points = [points]
-    inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     masks = processor.image_processor.post_process_masks(
@@ -34,12 +34,15 @@ def get_processed_inputs(image, points_str):
     return ~best_mask.cpu().numpy()
 def inpaint(raw_image, input_mask, prompt, negative_prompt=None, seed=74294536, cfgs=7):
     mask_image = Image.fromarray(input_mask)
     rand_gen = torch.manual_seed(seed)
     pipeline = AutoPipelineForInpainting.from_pretrained(
-        "diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16
-    )
-    pipeline.enable_model_cpu_offload()
     image = pipeline(
         prompt=prompt,
         image=raw_image,
@@ -50,7 +53,9 @@ def inpaint(raw_image, input_mask, prompt, negative_prompt=None, seed=74294536,
     ).images[0]
     return image
-def gradio_interface(image, points, positive_prompt, negative_prompt):
     raw_image = Image.fromarray(image).convert("RGB").resize((512, 512))
     mask = get_processed_inputs(raw_image, points)
     processed_image = inpaint(raw_image, mask, positive_prompt, negative_prompt)
@@ -60,7 +65,7 @@ iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Image(type="numpy", label="Input Image"),
-        gr.Textbox(label="Points (format: x1,y1 x2,y2 ...)", placeholder="e.g., 100,100 200,200"),
         gr.Textbox(label="Positive Prompt", placeholder="Enter positive prompt here"),
         gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt here")
     ],
@@ -69,7 +74,7 @@ iface = gr.Interface(
         gr.Image(label="Segmentation Mask")
     ],
     title="Interactive Image Inpainting",
-    description="Enter points as 'x1,y1 x2,y2 ...' for segmentation, provide prompts, and see the inpainted result."
 )
 iface.launch(share=True)

 import numpy as np
 from transformers import SamModel, SamProcessor
 from diffusers import AutoPipelineForInpainting
 import torch
+# Check if GPU is available, otherwise use CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
 # Model and Processor setup
 model_name = "facebook/sam-vit-huge"
 processor = SamProcessor.from_pretrained(model_name)
 def mask_to_rgb(mask):
+    """ Convert binary mask to RGB with transparency for the background. """
     bg_transparent = np.zeros(mask.shape + (4,), dtype=np.uint8)
+    bg_transparent[mask == 1] = [0, 255, 0, 127]  # Green mask with some transparency
     return bg_transparent
+def get_processed_inputs(image, points):
+    """ Process the input image and points using SAM model and processor. """
+    inputs = processor(image, input_points=points, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     masks = processor.image_processor.post_process_masks(
     return ~best_mask.cpu().numpy()
 def inpaint(raw_image, input_mask, prompt, negative_prompt=None, seed=74294536, cfgs=7):
+    """ Inpaint the masked area in the image using a text prompt and an inpainting pipeline. """
     mask_image = Image.fromarray(input_mask)
     rand_gen = torch.manual_seed(seed)
     pipeline = AutoPipelineForInpainting.from_pretrained(
+        "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    ).to(device)
+    if device == "cpu":
+        pipeline.enable_model_cpu_offload()
     image = pipeline(
         prompt=prompt,
         image=raw_image,
     ).images[0]
     return image
+def gradio_interface(image, points_json, positive_prompt, negative_prompt):
+    """ Gradio interface function to handle image, points for segmentation, and prompts. """
+    points = [[(point['x'], point['y']) for point in stroke['points']] for stroke in points_json]
     raw_image = Image.fromarray(image).convert("RGB").resize((512, 512))
     mask = get_processed_inputs(raw_image, points)
     processed_image = inpaint(raw_image, mask, positive_prompt, negative_prompt)
     fn=gradio_interface,
     inputs=[
         gr.Image(type="numpy", label="Input Image"),
+        gr.Image(type="json", label="Click to select points", tool="sketch", brush_radius=1, shape=(512, 512)),
         gr.Textbox(label="Positive Prompt", placeholder="Enter positive prompt here"),
         gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt here")
     ],
         gr.Image(label="Segmentation Mask")
     ],
     title="Interactive Image Inpainting",
+    description="Click on the image to select points for segmentation, provide prompts, and see the inpainted result."
 )
 iface.launch(share=True)