Spaces:

Sutirtha
/

LangSAM-color_matcher-Adv

Running

App Files Files Community

Sutirtha commited on Sep 25, 2024

Commit

d16663b

verified ·

1 Parent(s): 861b0a9

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -73

app.py CHANGED Viewed

@@ -10,58 +10,62 @@ import torch
 # Load the LangSAM model
 model = LangSAM()  # Use the default model or specify custom checkpoint if necessary
-def extract_mask(image_pil, text_prompt):
-    masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)
-    masks_np = masks[0].cpu().numpy()
-    mask = (masks_np > 0).astype(np.uint8) * 255  # Binary mask
-    return mask
 def apply_color_matching(source_img_np, ref_img_np):
     # Initialize ColorMatcher
     cm = ColorMatcher()
     # Apply color matching
     img_res = cm.transfer(src=source_img_np, ref=ref_img_np, method='mkl')
     # Normalize the result
     img_res = Normalizer(img_res).uint8_norm()
     return img_res
-def process_image(current_image_pil, prompt, replacement_image_pil, color_ref_image_pil, apply_replacement, apply_color_grading, blending_amount, image_history):
     # Check if current_image_pil is None
     if current_image_pil is None:
         return None, "No current image to edit.", image_history, None
     if not apply_replacement and not apply_color_grading:
         return current_image_pil, "No changes applied. Please select at least one operation.", image_history, current_image_pil
     if apply_replacement and replacement_image_pil is None:
         return current_image_pil, "Replacement image not provided.", image_history, current_image_pil
     if apply_color_grading and color_ref_image_pil is None:
         return current_image_pil, "Color reference image not provided.", image_history, current_image_pil
     # Save current image to history for undo
     if image_history is None:
         image_history = []
     image_history.append(current_image_pil.copy())
-    # Extract mask
-    mask = extract_mask(current_image_pil, prompt)
-    # Check if mask is valid
-    if mask.sum() == 0:
-        return current_image_pil, f"No mask detected for prompt: {prompt}", image_history, current_image_pil
     # Proceed with replacement or color matching
     current_image_np = np.array(current_image_pil)
     result_image_np = current_image_np.copy()
     # Create mask with blending
     # First, normalize mask to range [0,1]
     mask_normalized = mask.astype(np.float32) / 255.0
     # Apply blending by blurring the mask
     if blending_amount > 0:
         # The kernel size for blurring; larger blending_amount means more blur
@@ -71,56 +75,44 @@ def process_image(current_image_pil, prompt, replacement_image_pil, color_ref_im
         mask_blurred = cv2.GaussianBlur(mask_normalized, (kernel_size, kernel_size), 0)
     else:
         mask_blurred = mask_normalized
     # Convert mask to 3 channels
     mask_blurred_3ch = cv2.merge([mask_blurred, mask_blurred, mask_blurred])
     # If apply replacement
     if apply_replacement:
-        # Resize replacement image to fit the mask area
-        # Get bounding box of the mask
-        y_indices, x_indices = np.where(mask > 0)
-        if y_indices.size == 0 or x_indices.size == 0:
-            # No mask detected
-            return current_image_pil, f"No mask detected for prompt: {prompt}", image_history, current_image_pil
-        y_min, y_max = y_indices.min(), y_indices.max()
-        x_min, x_max = x_indices.min(), x_indices.max()
-        # Extract the region of interest
-        mask_height = y_max - y_min + 1
-        mask_width = x_max - x_min + 1
-        # Resize replacement image to fit mask area
-        replacement_image_resized = replacement_image_pil.resize((mask_width, mask_height))
         replacement_image_np = np.array(replacement_image_resized)
-        # Create a mask for the ROI
-        mask_roi = mask_blurred[y_min:y_max+1, x_min:x_max+1]
-        mask_roi_3ch = cv2.merge([mask_roi, mask_roi, mask_roi])
-        # Replace the masked area with the replacement image using blending
-        region_to_replace = result_image_np[y_min:y_max+1, x_min:x_max+1]
-        blended_region = (replacement_image_np.astype(np.float32) * mask_roi_3ch + region_to_replace.astype(np.float32) * (1 - mask_roi_3ch)).astype(np.uint8)
-        result_image_np[y_min:y_max+1, x_min:x_max+1] = blended_region
     # If apply color grading
     if apply_color_grading:
-        # Extract the masked area
-        masked_region = (result_image_np.astype(np.float32) * mask_blurred_3ch).astype(np.uint8)
         # Convert color reference image to numpy
         color_ref_image_np = np.array(color_ref_image_pil)
-        # Apply color matching
-        color_matched_region = apply_color_matching(masked_region, color_ref_image_np)
-        # Blend the color matched region back into the result image
-        result_image_np = (color_matched_region.astype(np.float32) * mask_blurred_3ch + result_image_np.astype(np.float32) * (1 - mask_blurred_3ch)).astype(np.uint8)
     # Convert result back to PIL Image
     result_image_pil = Image.fromarray(result_image_np)
     # Update current_image_pil
     current_image_pil = result_image_pil
-    return current_image_pil, f"Applied changes for prompt: {prompt}", image_history, current_image_pil
 def undo(image_history):
     if image_history and len(image_history) > 1:
@@ -141,46 +133,62 @@ def gradio_interface():
         # Define the state variables
         image_history = gr.State([])
         current_image_pil = gr.State(None)
         gr.Markdown("## Continuous Image Editing with LangSAM")
         with gr.Row():
             with gr.Column():
                 initial_image = gr.Image(type="pil", label="Upload Image")
-                prompt = gr.Textbox(lines=1, placeholder="Enter prompt for object detection", label="Prompt")
                 replacement_image = gr.Image(type="pil", label="Replacement Image (optional)")
                 color_ref_image = gr.Image(type="pil", label="Color Reference Image (optional)")
                 apply_replacement = gr.Checkbox(label="Apply Replacement", value=False)
                 apply_color_grading = gr.Checkbox(label="Apply Color Grading", value=False)
                 blending_amount = gr.Slider(minimum=0, maximum=50, step=1, label="Blending Amount", value=0)
                 apply_button = gr.Button("Apply Changes")
                 undo_button = gr.Button("Undo")
             with gr.Column():
                 current_image_display = gr.Image(type="pil", label="Edited Image", interactive=False)
                 status = gr.Textbox(lines=2, interactive=False, label="Status")
         def initialize_image(initial_image_pil):
             # Initialize image history with the initial image
             if initial_image_pil is not None:
                 image_history = [initial_image_pil]
                 current_image_pil = initial_image_pil
-                return current_image_pil, image_history, initial_image_pil
             else:
-                return None, [], None
         # When the initial image is uploaded, initialize the image history
-        initial_image.upload(fn=initialize_image, inputs=initial_image, outputs=[current_image_pil, image_history, current_image_display])
         # Apply button click
-        apply_button.click(fn=process_image,
-                           inputs=[current_image_pil, prompt, replacement_image, color_ref_image, apply_replacement, apply_color_grading, blending_amount, image_history],
                            outputs=[current_image_pil, status, image_history, current_image_display])
         # Undo button click
         undo_button.click(fn=undo, inputs=image_history, outputs=[current_image_pil, image_history, current_image_display])
     demo.launch(share=True)
 # Run the Gradio Interface
 if __name__ == "__main__":
     gradio_interface()

 # Load the LangSAM model
 model = LangSAM()  # Use the default model or specify custom checkpoint if necessary
+def extract_masks(image_pil, prompts):
+    prompts_list = [p.strip() for p in prompts.split(',') if p.strip()]
+    masks_dict = {}
+    for prompt in prompts_list:
+        masks, boxes, phrases, logits = model.predict(image_pil, prompt)
+        if masks:
+            masks_np = masks[0].cpu().numpy()
+            mask = (masks_np > 0).astype(np.uint8) * 255  # Binary mask
+            masks_dict[prompt] = mask
+    return masks_dict
 def apply_color_matching(source_img_np, ref_img_np):
     # Initialize ColorMatcher
     cm = ColorMatcher()
     # Apply color matching
     img_res = cm.transfer(src=source_img_np, ref=ref_img_np, method='mkl')
     # Normalize the result
     img_res = Normalizer(img_res).uint8_norm()
     return img_res
+def process_image(current_image_pil, selected_prompt, masks_dict, replacement_image_pil, color_ref_image_pil, apply_replacement, apply_color_grading, apply_color_to_full_image, blending_amount, image_history):
     # Check if current_image_pil is None
     if current_image_pil is None:
         return None, "No current image to edit.", image_history, None
     if not apply_replacement and not apply_color_grading:
         return current_image_pil, "No changes applied. Please select at least one operation.", image_history, current_image_pil
     if apply_replacement and replacement_image_pil is None:
         return current_image_pil, "Replacement image not provided.", image_history, current_image_pil
     if apply_color_grading and color_ref_image_pil is None:
         return current_image_pil, "Color reference image not provided.", image_history, current_image_pil
+    # Get the mask from masks_dict
+    if selected_prompt not in masks_dict:
+        return current_image_pil, f"No mask available for selected segment: {selected_prompt}", image_history, current_image_pil
+    mask = masks_dict[selected_prompt]
     # Save current image to history for undo
     if image_history is None:
         image_history = []
     image_history.append(current_image_pil.copy())
     # Proceed with replacement or color matching
     current_image_np = np.array(current_image_pil)
     result_image_np = current_image_np.copy()
     # Create mask with blending
     # First, normalize mask to range [0,1]
     mask_normalized = mask.astype(np.float32) / 255.0
     # Apply blending by blurring the mask
     if blending_amount > 0:
         # The kernel size for blurring; larger blending_amount means more blur
         mask_blurred = cv2.GaussianBlur(mask_normalized, (kernel_size, kernel_size), 0)
     else:
         mask_blurred = mask_normalized
     # Convert mask to 3 channels
     mask_blurred_3ch = cv2.merge([mask_blurred, mask_blurred, mask_blurred])
     # If apply replacement
     if apply_replacement:
+        # Resize replacement image to match current image
+        replacement_image_resized = replacement_image_pil.resize(current_image_pil.size)
         replacement_image_np = np.array(replacement_image_resized)
+        # Blend the replacement image with the current image using the mask
+        result_image_np = (replacement_image_np.astype(np.float32) * mask_blurred_3ch + result_image_np.astype(np.float32) * (1 - mask_blurred_3ch)).astype(np.uint8)
     # If apply color grading
     if apply_color_grading:
         # Convert color reference image to numpy
         color_ref_image_np = np.array(color_ref_image_pil)
+        if apply_color_to_full_image:
+            # Apply color matching to the full image
+            color_matched_image = apply_color_matching(result_image_np, color_ref_image_np)
+            result_image_np = color_matched_image
+        else:
+            # Apply color matching only to the masked area
+            # Extract the masked area
+            masked_region = (result_image_np.astype(np.float32) * mask_blurred_3ch).astype(np.uint8)
+            # Apply color matching
+            color_matched_region = apply_color_matching(masked_region, color_ref_image_np)
+            # Blend the color matched region back into the result image
+            result_image_np = (color_matched_region.astype(np.float32) * mask_blurred_3ch + result_image_np.astype(np.float32) * (1 - mask_blurred_3ch)).astype(np.uint8)
     # Convert result back to PIL Image
     result_image_pil = Image.fromarray(result_image_np)
     # Update current_image_pil
     current_image_pil = result_image_pil
+    return current_image_pil, f"Applied changes to '{selected_prompt}'", image_history, current_image_pil
 def undo(image_history):
     if image_history and len(image_history) > 1:
         # Define the state variables
         image_history = gr.State([])
         current_image_pil = gr.State(None)
+        masks_dict = gr.State({})  # Store masks for each prompt
         gr.Markdown("## Continuous Image Editing with LangSAM")
         with gr.Row():
             with gr.Column():
                 initial_image = gr.Image(type="pil", label="Upload Image")
+                prompts = gr.Textbox(lines=1, placeholder="Enter prompts separated by commas (e.g., sky, grass)", label="Prompts")
+                segment_button = gr.Button("Segment Image")
+                segment_dropdown = gr.Dropdown(label="Select Segment", choices=[])
                 replacement_image = gr.Image(type="pil", label="Replacement Image (optional)")
                 color_ref_image = gr.Image(type="pil", label="Color Reference Image (optional)")
                 apply_replacement = gr.Checkbox(label="Apply Replacement", value=False)
                 apply_color_grading = gr.Checkbox(label="Apply Color Grading", value=False)
+                apply_color_to_full_image = gr.Checkbox(label="Apply Color Correction to Full Image", value=False)
                 blending_amount = gr.Slider(minimum=0, maximum=50, step=1, label="Blending Amount", value=0)
                 apply_button = gr.Button("Apply Changes")
                 undo_button = gr.Button("Undo")
             with gr.Column():
                 current_image_display = gr.Image(type="pil", label="Edited Image", interactive=False)
                 status = gr.Textbox(lines=2, interactive=False, label="Status")
         def initialize_image(initial_image_pil):
             # Initialize image history with the initial image
             if initial_image_pil is not None:
                 image_history = [initial_image_pil]
                 current_image_pil = initial_image_pil
+                return current_image_pil, image_history, initial_image_pil, {}, [], "Image loaded."
             else:
+                return None, [], None, {}, [], "No image loaded."
         # When the initial image is uploaded, initialize the image history
+        initial_image.upload(fn=initialize_image, inputs=initial_image, outputs=[current_image_pil, image_history, current_image_display, masks_dict, segment_dropdown, status])
+        # Segment button click
+        def segment_image_wrapper(current_image_pil, prompts):
+            if current_image_pil is None:
+                return "No image uploaded.", {}, []
+            masks = extract_masks(current_image_pil, prompts)
+            if not masks:
+                return "No masks detected for the given prompts.", {}, []
+            dropdown_choices = list(masks.keys())
+            return "Segmentation completed.", masks, gr.Dropdown.update(choices=dropdown_choices, value=dropdown_choices[0])
+        segment_button.click(fn=segment_image_wrapper, inputs=[current_image_pil, prompts], outputs=[status, masks_dict, segment_dropdown])
         # Apply button click
+        apply_button.click(fn=process_image,
+                           inputs=[current_image_pil, segment_dropdown, masks_dict, replacement_image, color_ref_image, apply_replacement, apply_color_grading, apply_color_to_full_image, blending_amount, image_history],
                            outputs=[current_image_pil, status, image_history, current_image_display])
         # Undo button click
         undo_button.click(fn=undo, inputs=image_history, outputs=[current_image_pil, image_history, current_image_display])
     demo.launch(share=True)
 # Run the Gradio Interface
 if __name__ == "__main__":
     gradio_interface()