File size: 6,563 Bytes
b795d51
892cf9d
b795d51
 
9c7b939
 
 
a68f3d0
b795d51
9c7b939
892cf9d
 
 
 
 
 
 
b795d51
9c7b939
 
b795d51
9c7b939
 
 
b795d51
 
 
9c7b939
 
b795d51
892cf9d
 
 
 
b795d51
892cf9d
 
 
 
9c7b939
892cf9d
 
9c7b939
892cf9d
 
 
9c7b939
892cf9d
 
 
 
9c7b939
892cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c7b939
892cf9d
 
 
 
 
 
 
 
 
 
9c7b939
892cf9d
 
9c7b939
892cf9d
 
 
 
b795d51
892cf9d
 
 
 
 
 
 
 
 
 
 
 
 
b795d51
892cf9d
 
 
 
 
 
 
 
 
 
 
 
e9ef3b4
 
892cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b795d51
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
from PIL import Image
import numpy as np
import cv2
from lang_sam import LangSAM
from color_matcher import ColorMatcher
from color_matcher.normalizer import Normalizer
import torch

# Load the LangSAM model
model = LangSAM()  # Use the default model or specify custom checkpoint if necessary

def extract_mask(image_pil, text_prompt):
    masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)
    masks_np = masks[0].cpu().numpy()
    mask = (masks_np > 0).astype(np.uint8) * 255  # Binary mask
    return mask

def apply_color_matching(source_img_np, ref_img_np):
    # Initialize ColorMatcher
    cm = ColorMatcher()
    
    # Apply color matching
    img_res = cm.transfer(src=source_img_np, ref=ref_img_np, method='mkl')
    
    # Normalize the result
    img_res = Normalizer(img_res).uint8_norm()
    
    return img_res

def process_image(current_image_pil, prompt, replacement_image_pil, color_ref_image_pil, image_history):
    # Check if current_image_pil is None
    if current_image_pil is None:
        return None, "No current image to edit.", image_history, None
    
    # Save current image to history for undo
    if image_history is None:
        image_history = []
    image_history.append(current_image_pil.copy())
    
    # Extract mask
    mask = extract_mask(current_image_pil, prompt)
    
    # Check if mask is valid
    if mask.sum() == 0:
        return current_image_pil, f"No mask detected for prompt: {prompt}", image_history, current_image_pil
    
    # Proceed with replacement or color matching
    current_image_np = np.array(current_image_pil)
    mask_3ch = cv2.merge([mask, mask, mask])
    result_image_np = current_image_np.copy()
    
    # If replacement image is provided
    if replacement_image_pil is not None:
        # Resize replacement image to fit the mask area
        # Get bounding box of the mask
        y_indices, x_indices = np.where(mask > 0)
        if y_indices.size == 0 or x_indices.size == 0:
            # No mask detected
            return current_image_pil, f"No mask detected for prompt: {prompt}", image_history, current_image_pil
        y_min, y_max = y_indices.min(), y_indices.max()
        x_min, x_max = x_indices.min(), x_indices.max()
        
        # Extract the region of interest
        mask_height = y_max - y_min + 1
        mask_width = x_max - x_min + 1
        
        # Resize replacement image to fit mask area
        replacement_image_resized = replacement_image_pil.resize((mask_width, mask_height))
        replacement_image_np = np.array(replacement_image_resized)
        
        # Create a mask for the ROI
        mask_roi = mask[y_min:y_max+1, x_min:x_max+1]
        mask_roi_3ch = cv2.merge([mask_roi, mask_roi, mask_roi])
        
        # Replace the masked area with the replacement image
        result_image_np[y_min:y_max+1, x_min:x_max+1] = np.where(mask_roi_3ch > 0, replacement_image_np, result_image_np[y_min:y_max+1, x_min:x_max+1])
    
    # If color reference image is provided
    if color_ref_image_pil is not None:
        # Extract the masked area
        masked_region = cv2.bitwise_and(result_image_np, mask_3ch)
        # Convert color reference image to numpy
        color_ref_image_np = np.array(color_ref_image_pil)
        # Apply color matching
        color_matched_region = apply_color_matching(masked_region, color_ref_image_np)
        # Combine the color matched region back into the result image
        result_image_np = np.where(mask_3ch > 0, color_matched_region, result_image_np)
    
    # Convert result back to PIL Image
    result_image_pil = Image.fromarray(result_image_np)
    
    # Update current_image_pil
    current_image_pil = result_image_pil
    
    return current_image_pil, f"Applied changes for prompt: {prompt}", image_history, current_image_pil

def undo(image_history):
    if image_history and len(image_history) > 1:
        # Pop the last image
        image_history.pop()
        # Return the previous image
        current_image_pil = image_history[-1]
        return current_image_pil, image_history, current_image_pil
    elif image_history and len(image_history) == 1:
        current_image_pil = image_history[0]
        return current_image_pil, image_history, current_image_pil
    else:
        # Cannot undo
        return None, [], None

def gradio_interface():
    with gr.Blocks() as demo:
        # Define the state variables
        image_history = gr.State([])
        current_image_pil = gr.State(None)
        
        gr.Markdown("## Continuous Image Editing with LangSAM")
        
        with gr.Row():
            with gr.Column():
                initial_image = gr.Image(type="pil", label="Upload Image")
                prompt = gr.Textbox(lines=1, placeholder="Enter prompt for object detection", label="Prompt")
                replacement_image = gr.Image(type="pil", label="Replacement Image (optional)")
                color_ref_image = gr.Image(type="pil", label="Color Reference Image (optional)")
                apply_button = gr.Button("Apply Changes")
                undo_button = gr.Button("Undo")
            with gr.Column():
                current_image_display = gr.Image(type="pil", label="Edited Image", interactive=False)
                status = gr.Textbox(lines=2, interactive=False, label="Status")
        
        def initialize_image(initial_image_pil):
            # Initialize image history with the initial image
            if initial_image_pil is not None:
                image_history = [initial_image_pil]
                current_image_pil = initial_image_pil
                return current_image_pil, image_history, initial_image_pil
            else:
                return None, [], None
        
        # When the initial image is uploaded, initialize the image history
        initial_image.upload(fn=initialize_image, inputs=initial_image, outputs=[current_image_pil, image_history, current_image_display])
        
        # Apply button click
        apply_button.click(fn=process_image, 
                           inputs=[current_image_pil, prompt, replacement_image, color_ref_image, image_history],
                           outputs=[current_image_pil, status, image_history, current_image_display])
        
        # Undo button click
        undo_button.click(fn=undo, inputs=image_history, outputs=[current_image_pil, image_history, current_image_display])
        
    demo.launch(share=True)
    
# Run the Gradio Interface
if __name__ == "__main__":
    gradio_interface()