File size: 10,899 Bytes
95d4bb7 98a244a 95d4bb7 98a244a 95d4bb7 9b891da 98a244a 3e0e07e 0f4d2e1 95d4bb7 7d1a5da 95d4bb7 9b891da 95d4bb7 7d1a5da 95775e7 7d1a5da 95d4bb7 7d1a5da 0f4d2e1 7d1a5da 95d4bb7 7d1a5da 95d4bb7 9b891da 95d4bb7 9e58ce5 95d4bb7 4934d7e 9c8bfe6 ae90f91 7d1a5da 95d4bb7 4934d7e 9b891da f294907 95d4bb7 e53acb2 95d4bb7 e53acb2 95d4bb7 7d1a5da 95d4bb7 7d1a5da 0f4d2e1 95d4bb7 39cb3b9 95d4bb7 9b891da 95d4bb7 4934d7e 95d4bb7 7d1a5da 95d4bb7 9b891da 95d4bb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 |
import os
import re
import time
from dataclasses import dataclass
from glob import iglob
from einops import rearrange
from PIL import ExifTags, Image
import torch
import gradio as gr
import numpy as np
from flux.sampling import prepare
from flux.util import (load_ae, load_clip, load_t5)
from models.kv_edit import Flux_kv_edit,Flux_kv_edit_inf
import spaces
from huggingface_hub import login
login(token=os.getenv('Token'))
@dataclass
class SamplingOptions:
source_prompt: str = ''
target_prompt: str = ''
# prompt: str
width: int = 1366
height: int = 768
inversion_num_steps: int = 0
denoise_num_steps: int = 0
skip_step: int = 0
inversion_guidance: float = 1.0
denoise_guidance: float = 1.0
seed: int = 42
re_init: bool = False
attn_mask: bool = False
attn_scale_value: float = 0.0
def resize_image(image_array, max_width=512, max_height=512):
# 将numpy数组转换为PIL图像
if image_array.shape[-1] == 4:
mode = 'RGBA'
else:
mode = 'RGB'
pil_image = Image.fromarray(image_array, mode=mode)
# 获取原始图像的宽度和高度
original_width, original_height = pil_image.size
# 计算缩放比例
width_ratio = max_width / original_width
height_ratio = max_height / original_height
# 选择较小的缩放比例以确保图像不超过最大宽度和高度
scale_ratio = min(width_ratio, height_ratio)
# 如果图像已经小于或等于最大分辨率,则不进行缩放
if scale_ratio >= 1:
return image_array
# 计算新的宽度和高度
new_width = int(original_width * scale_ratio)
new_height = int(original_height * scale_ratio)
# 缩放图像
resized_image = pil_image.resize((new_width, new_height))
# 将PIL图像转换回numpy数组
resized_array = np.array(resized_image)
return resized_array
@torch.inference_mode()
def encode(init_image, torch_device):
init_image = torch.from_numpy(init_image).permute(2, 0, 1).float() / 127.5 - 1
init_image = init_image.unsqueeze(0)
init_image = init_image.to(torch_device)
with torch.no_grad():
init_image = ae.encode(init_image.to()).to(torch.bfloat16)
return init_image
# init all components
device = "cuda" if torch.cuda.is_available() else "cpu"
name = 'flux-dev'
ae = load_ae(name, device)
t5 = load_t5(device, max_length=256 if name == "flux-schnell" else 512)
clip = load_clip(device)
model = Flux_kv_edit(device=device, name=name)
offload = False
name = "flux-dev"
is_schnell = False
feature_path = 'feature'
output_dir = 'result'
add_sampling_metadata = True
@spaces.GPU(duration=120)
@torch.inference_mode()
def edit(brush_canvas,
source_prompt, target_prompt,
inversion_num_steps, denoise_num_steps,
skip_step,
inversion_guidance, denoise_guidance,seed,
re_init,attn_mask,attn_scale_value
):
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.empty_cache()
rgba_init_image = brush_canvas["background"]
rgba_init_image = resize_image(rgba_init_image)
init_image = rgba_init_image[:,:,:3]
shape = init_image.shape
height = shape[0] if shape[0] % 16 == 0 else shape[0] - shape[0] % 16
width = shape[1] if shape[1] % 16 == 0 else shape[1] - shape[1] % 16
init_image = init_image[:height, :width, :]
rgba_init_image = rgba_init_image[:height, :width, :]
rgba_mask = brush_canvas["layers"][0]
rgba_mask = resize_image(rgba_mask)[:height, :width, :]
mask = rgba_mask[:,:,3]/255
mask = mask.astype(int)
rgba_mask[:,:,3] = rgba_mask[:,:,3]//2
masked_image = Image.alpha_composite(Image.fromarray(rgba_init_image, 'RGBA'), Image.fromarray(rgba_mask, 'RGBA'))
mask = torch.from_numpy(mask).unsqueeze(0).unsqueeze(0).to(torch.bfloat16).to(device)
init_image = encode(init_image, device).to(device)
seed = int(seed)
if seed == -1:
seed = torch.randint(0, 2**32, (1,)).item()
opts = SamplingOptions(
source_prompt=source_prompt,
target_prompt=target_prompt,
width=width,
height=height,
inversion_num_steps=inversion_num_steps,
denoise_num_steps=denoise_num_steps,
skip_step=skip_step,
inversion_guidance=inversion_guidance,
denoise_guidance=denoise_guidance,
seed=seed,
re_init=re_init,
attn_mask=attn_mask,
attn_scale_value = attn_scale_value
)
torch.manual_seed(opts.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(opts.seed)
t0 = time.perf_counter()
#############inverse#######################
# 将布尔数组转换为整数类型,如果需要1和0而不是True和False的话
with torch.no_grad():
inp = prepare(t5, clip, init_image, prompt=opts.source_prompt)
inp_target = prepare(t5, clip, init_image, prompt=opts.target_prompt)
x = model(inp, inp_target, mask, opts)
device = torch.device("cuda")
with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
x = ae.decode(x)
# 得到还在显卡上的特征
# bring into PIL format and save
x = x.clamp(-1, 1)
# x = embed_watermark(x.float())
x = x.float().cpu()
x = rearrange(x[0], "c h w -> h w c")
if torch.cuda.is_available():
torch.cuda.synchronize()
#############回到像素空间就算结束#######################
output_name = os.path.join(output_dir, "img_{idx}.jpg")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
idx = 0
else:
fns = [fn for fn in iglob(output_name.format(idx="*")) if re.search(r"img_[0-9]+\.jpg$", fn)]
if len(fns) > 0:
idx = max(int(fn.split("_")[-1].split(".")[0]) for fn in fns) + 1
else:
idx = 0
#############找idx#######################
fn = output_name.format(idx=idx)
img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
exif_data = Image.Exif()
exif_data[ExifTags.Base.Software] = "AI generated;txt2img;flux"
exif_data[ExifTags.Base.Make] = "Black Forest Labs"
exif_data[ExifTags.Base.Model] = name
exif_data[ExifTags.Base.ImageDescription] = target_prompt
img.save(fn, exif=exif_data, quality=95, subsampling=0)
masked_image.save(fn.replace(".jpg", "_mask.png"), format='PNG')
t1 = time.perf_counter()
print(f"Done in {t1 - t0:.1f}s. Saving {fn}")
print("End Edit")
return img
def create_demo(model_name: str):
# editor = FluxEditor_kv_demo()
is_schnell = model_name == "flux-schnell"
title = r"""
<h1 align="center">🎨 KV-Edit: Training-Free Image Editing for Precise Background Preservation</h1>
"""
description = r"""
<b>Official 🤗 Gradio demo</b> for <a href='https://github.com/Xilluill/KV-Edit' target='_blank'><b>KV-Edit: Training-Free Image Editing for Precise Background Preservation</b></a>.<br>
💫💫 <b>Here is editing steps:</b> (We highly recommend you run our code locally!😘 Only one inversion before multiple editing, very productive!) <br>
1️⃣ Upload your image that needs to be edited (The resolution will be scaled to less than 1360*768) <br>
2️⃣ Fill in your source prompt and use the brush tool to cover the area you want to edit (❗️required). <br>
3️⃣ Fill in your target prompt, then adjust the hyperparameters. <br>
4️⃣ Click the "Edit" button to generate your edited image! <br>
🔔🔔 [<b>Important</b>] Less skip steps, "re_init" and "attn_mask" will enhance the editing performance, making the results aligned with your text but may lead to discontinuous images. <br>
If you fail because of these three, we recommend trying to increase "attn_scale" to increase attention between mask and background.<br>
"""
article = r"""
If our work is helpful, please help to ⭐ the <a href='https://github.com/Xilluill/KV-Edit' target='_blank'>Github Repo</a>. Thanks!
"""
badge = r"""
[](https://github.com/Xilluill/KV-Edit)
"""
with gr.Blocks() as demo:
gr.HTML(title)
gr.Markdown(description)
with gr.Row():
with gr.Column():
source_prompt = gr.Textbox(label="Source Prompt", value='' )
inversion_num_steps = gr.Slider(1, 50, 28, step=1, label="Number of inversion steps")
target_prompt = gr.Textbox(label="Target Prompt", value='' )
denoise_num_steps = gr.Slider(1, 50, 28, step=1, label="Number of denoise steps")
brush_canvas = gr.ImageEditor(label="Brush Canvas",
sources=('upload'),
brush=gr.Brush(colors=["#ff0000"],color_mode='fixed'),
interactive=True,
transforms=[],
container=True,
format='png')
edit_btn = gr.Button("edit")
with gr.Column():
with gr.Accordion("Advanced Options", open=True):
skip_step = gr.Slider(0, 30, 0, step=1, label="Number of skip steps")
inversion_guidance = gr.Slider(1.0, 10.0, 1.5, step=0.1, label="inversion Guidance", interactive=not is_schnell)
denoise_guidance = gr.Slider(1.0, 10.0, 5.5, step=0.1, label="denoise Guidance", interactive=not is_schnell)
attn_scale_value = gr.Slider(0.0, 5.0, 1, step=0.1, label="attn_scale")
seed = gr.Textbox('0', label="Seed (-1 for random)", visible=True)
with gr.Row():
re_init = gr.Checkbox(label="re_init", value=False)
attn_mask = gr.Checkbox(label="attn_mask", value=False)
output_image = gr.Image(label="Generated Image")
gr.Markdown(article)
edit_btn.click(
fn=edit,
inputs=[brush_canvas,
source_prompt, target_prompt,
inversion_num_steps, denoise_num_steps,
skip_step,
inversion_guidance,
denoise_guidance,seed,
re_init,attn_mask,attn_scale_value
],
outputs=[output_image]
)
return demo
demo = create_demo("flux-dev")
demo.launch() |