File size: 9,913 Bytes
95d4bb7 98a244a 95d4bb7 98a244a 95d4bb7 98a244a 95d4bb7 e53acb2 95d4bb7 e53acb2 95d4bb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
import os
import re
import time
from dataclasses import dataclass
from glob import iglob
from einops import rearrange
from PIL import ExifTags, Image
import torch
import gradio as gr
import numpy as np
from flux.sampling import prepare
from flux.util import (load_ae, load_clip, load_t5)
from models.kv_edit import Flux_kv_edit,Flux_kv_edit_inf
import spaces
from huggingface_hub import login
login(token=os.getenv('Token'))
@dataclass
class SamplingOptions:
source_prompt: str = ''
target_prompt: str = ''
# prompt: str
width: int = 1366
height: int = 768
inversion_num_steps: int = 0
denoise_num_steps: int = 0
skip_step: int = 0
inversion_guidance: float = 1.0
denoise_guidance: float = 1.0
seed: int = 42
re_init: bool = False
attn_mask: bool = False
@torch.inference_mode()
def encode(init_image, torch_device):
init_image = torch.from_numpy(init_image).permute(2, 0, 1).float() / 127.5 - 1
init_image = init_image.unsqueeze(0)
init_image = init_image.to(torch_device)
with torch.no_grad():
init_image = ae.encode(init_image.to()).to(torch.bfloat16)
return init_image
# init all components
device = "cuda" if torch.cuda.is_available() else "cpu"
name = 'flux-dev'
ae = load_ae(name, device)
t5 = load_t5(device, max_length=256 if name == "flux-schnell" else 512)
clip = load_clip(device)
model = Flux_kv_edit(device=device, name=name)
offload = False
name = "flux-dev"
is_schnell = False
feature_path = 'feature'
output_dir = 'result'
add_sampling_metadata = True
@spaces.GPU(duration=120)
@torch.inference_mode()
def edit(init_image, brush_canvas,
source_prompt, target_prompt,
inversion_num_steps, denoise_num_steps,
skip_step,
inversion_guidance, denoise_guidance,seed,
re_init,attn_mask
):
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.empty_cache()
shape = init_image.shape
height = shape[0] if shape[0] % 16 == 0 else shape[0] - shape[0] % 16
width = shape[1] if shape[1] % 16 == 0 else shape[1] - shape[1] % 16
init_image = init_image[:height, :width, :]
brush_canvas = brush_canvas["composite"][:,:,:3][:height, :width, :]
# 如果brush_Canvas是三通道黑白图,说明就是输入的mask
if np.all(brush_canvas[:,:,0] == brush_canvas[:,:,1]) and np.all(brush_canvas[:,:,1] == brush_canvas[:,:,2]):
mask = brush_canvas[:,:,0]/255
mask = mask.astype(int)
else:
mask = np.any(init_image != brush_canvas, axis=-1) # 得到一个二维的布尔数组
mask = mask.astype(int)
mask_array = np.zeros((mask.shape[0], mask.shape[1], 4), dtype=np.uint8)
mask_array[:,:,0] = mask * 255 # R
mask_array[:,:,3] = mask * 128 # A (半透明,128表示50%透明度)
mask_image = Image.fromarray(mask_array, 'RGBA')
original_image = Image.fromarray(np.concatenate((init_image, np.full((height, width, 1), 255, dtype=np.uint8)), axis=2), 'RGBA')
masked_image = Image.alpha_composite(original_image, mask_image)
mask = torch.from_numpy(mask).unsqueeze(0).unsqueeze(0).to(torch.bfloat16).to(device)
init_image = encode(init_image, device).to(device)
seed = int(seed)
if seed == -1:
seed = torch.randint(0, 2**32, (1,)).item()
opts = SamplingOptions(
source_prompt=source_prompt,
target_prompt=target_prompt,
width=width,
height=height,
inversion_num_steps=inversion_num_steps,
denoise_num_steps=denoise_num_steps,
skip_step=skip_step,
inversion_guidance=inversion_guidance,
denoise_guidance=denoise_guidance,
seed=seed,
re_init=re_init,
attn_mask=attn_mask
)
torch.manual_seed(opts.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(opts.seed)
t0 = time.perf_counter()
#############inverse#######################
# 将布尔数组转换为整数类型,如果需要1和0而不是True和False的话
with torch.no_grad():
inp = prepare(t5, clip, init_image, prompt=opts.source_prompt)
inp_target = prepare(t5, clip, init_image, prompt=opts.target_prompt)
x = model(inp, inp_target, mask, opts)
device = torch.device("cuda")
with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
x = ae.decode(x)
# 得到还在显卡上的特征
# bring into PIL format and save
x = x.clamp(-1, 1)
# x = embed_watermark(x.float())
x = x.float().cpu()
x = rearrange(x[0], "c h w -> h w c")
if torch.cuda.is_available():
torch.cuda.synchronize()
#############回到像素空间就算结束#######################
output_name = os.path.join(output_dir, "img_{idx}.jpg")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
idx = 0
else:
fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
if len(fns) > 0:
idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
else:
idx = 0
#############找idx#######################
fn = output_name.format(idx=idx)
img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
exif_data = Image.Exif()
exif_data[ExifTags.Base.Software] = "AI generated;txt2img;flux"
exif_data[ExifTags.Base.Make] = "Black Forest Labs"
exif_data[ExifTags.Base.Model] = name
exif_data[ExifTags.Base.ImageDescription] = source_prompt
img.save(fn, exif=exif_data, quality=95, subsampling=0)
masked_image.save(fn.replace(".jpg", "_mask.png"), format='PNG')
t1 = time.perf_counter()
print(f"Done in {t1 - t0:.1f}s. Saving {fn}")
print("End Edit")
return img
def create_demo(model_name: str):
# editor = FluxEditor_kv_demo()
is_schnell = model_name == "flux-schnell"
title = r"""
<h1 align="center">🎨 KV-Edit: Training-Free Image Editing for Precise Background Preservation</h1>
"""
description = r"""
<b>Official 🤗 Gradio demo</b> for <a href='https://github.com/Xilluill/KV-Edit' target='_blank'><b>KV-Edit: Training-Free Image Editing for Precise Background Preservation</b></a>.<br>
🔔🔔[<b>Important</b>] Editing steps:<br>
1️⃣ Upload your image that needs to be edited (The resolution is expected be less than 1360*768, or the memory of GPU may be not enough.) <br>
2️⃣ Re-upload the original image and use the brush tool to draw your mask area. <br>
3️⃣ Fill in your source prompt and target prompt, then adjust the hyperparameters. <br>
4️⃣ Click the "Edit" button to generate your edited image! <br>
"""
article = r"""
If our work is helpful, please help to ⭐ the <a href='https://github.com/Xilluill/KV-Edit' target='_blank'>Github Repo</a>. Thanks!
"""
badge = r"""
[](https://github.com/Xilluill/KV-Edit)
"""
with gr.Blocks() as demo:
gr.HTML(title)
gr.Markdown(description)
gr.Markdown(article)
# gr.Markdown(badge)
with gr.Row():
with gr.Column():
source_prompt = gr.Textbox(label="Source Prompt", value='' )
inversion_num_steps = gr.Slider(1, 50, 28, step=1, label="Number of inversion steps")
target_prompt = gr.Textbox(label="Target Prompt", value='' )
denoise_num_steps = gr.Slider(1, 50, 28, step=1, label="Number of denoise steps")
init_image = gr.Image(label="Input Image", visible=True)
brush_canvas = gr.ImageEditor(label="Brush Canvas",
sources=('upload'),
brush=gr.Brush(default_size=10,
default_color="#000000"),
interactive=True,
container=True,
transforms=[],
height="auto",
format='png',scale=1)
edit_btn = gr.Button("edit")
with gr.Column():
with gr.Accordion("Advanced Options", open=True):
# num_steps = gr.Slider(1, 30, 25, step=1, label="Number of steps")
skip_step = gr.Slider(0, 30, 4, step=1, label="Number of inject steps")
inversion_guidance = gr.Slider(1.0, 10.0, 1.5, step=0.1, label="inversion Guidance", interactive=not is_schnell)
denoise_guidance = gr.Slider(1.0, 10.0, 5.5, step=0.1, label="denoise Guidance", interactive=not is_schnell)
seed = gr.Textbox('0', label="Seed (-1 for random)", visible=True)
with gr.Row():
re_init = gr.Checkbox(label="re_init", value=False)
attn_mask = gr.Checkbox(label="attn_mask", value=False)
output_image = gr.Image(label="Generated Image")
edit_btn.click(
fn=edit,
inputs=[init_image, brush_canvas,
source_prompt, target_prompt,
inversion_num_steps, denoise_num_steps,
skip_step,
inversion_guidance,
denoise_guidance,seed,
re_init,attn_mask
],
outputs=[output_image]
)
return demo
demo = create_demo("flux-dev")
demo.launch() |