|
import gradio as gr |
|
import torch |
|
import numpy as np |
|
import diffusers |
|
import os |
|
import spaces |
|
from PIL import Image |
|
hf_token = os.environ.get("HF_TOKEN") |
|
from diffusers import StableDiffusionXLInpaintPipeline, DDIMScheduler, UNet2DConditionModel |
|
from diffusers import ( |
|
AutoencoderKL, |
|
LCMScheduler, |
|
) |
|
from pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline |
|
from controlnet import ControlNetModel, ControlNetConditioningEmbedding |
|
import torch |
|
import numpy as np |
|
from PIL import Image |
|
import requests |
|
import PIL |
|
from io import BytesIO |
|
from torchvision import transforms |
|
|
|
|
|
ratios_map = { |
|
0.5:{"width":704,"height":1408}, |
|
0.57:{"width":768,"height":1344}, |
|
0.68:{"width":832,"height":1216}, |
|
0.72:{"width":832,"height":1152}, |
|
0.78:{"width":896,"height":1152}, |
|
0.82:{"width":896,"height":1088}, |
|
0.88:{"width":960,"height":1088}, |
|
0.94:{"width":960,"height":1024}, |
|
1.00:{"width":1024,"height":1024}, |
|
1.13:{"width":1088,"height":960}, |
|
1.21:{"width":1088,"height":896}, |
|
1.29:{"width":1152,"height":896}, |
|
1.38:{"width":1152,"height":832}, |
|
1.46:{"width":1216,"height":832}, |
|
1.67:{"width":1280,"height":768}, |
|
1.75:{"width":1344,"height":768}, |
|
2.00:{"width":1408,"height":704} |
|
} |
|
ratios = np.array(list(ratios_map.keys())) |
|
|
|
image_transforms = transforms.Compose( |
|
[ |
|
transforms.ToTensor(), |
|
] |
|
) |
|
|
|
default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers" |
|
|
|
|
|
def get_masked_image(image, image_mask, width, height): |
|
image_mask = image_mask |
|
image_mask = image_mask.resize((width, height)) |
|
image_mask_pil = image_mask |
|
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 |
|
image_mask = np.array(image_mask_pil.convert("L")).astype(np.float32) / 255.0 |
|
assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size" |
|
masked_image_to_present = image.copy() |
|
masked_image_to_present[image_mask > 0.5] = (0.5,0.5,0.5) |
|
image[image_mask > 0.5] = 0.5 |
|
image = Image.fromarray((image * 255.0).astype(np.uint8)) |
|
masked_image_to_present = Image.fromarray((masked_image_to_present * 255.0).astype(np.uint8)) |
|
return image, image_mask_pil, masked_image_to_present |
|
|
|
|
|
def get_size(init_image): |
|
w,h=init_image.size |
|
curr_ratio = w/h |
|
ind = np.argmin(np.abs(curr_ratio-ratios)) |
|
ratio = ratios[ind] |
|
chosen_ratio = ratios_map[ratio] |
|
w,h = chosen_ratio['width'], chosen_ratio['height'] |
|
|
|
return w,h |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
controlnet = ControlNetModel().from_pretrained("briaai/DEV-GenerativeFill", torch_dtype=torch.float16) |
|
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) |
|
pipe = StableDiffusionXLControlNetPipeline.from_pretrained("briaai/BRIA-2.3", controlnet=controlnet.to(dtype=torch.float16), torch_dtype=torch.float16, vae=vae) |
|
|
|
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) |
|
pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA") |
|
pipe.fuse_lora() |
|
|
|
pipe = pipe.to(device) |
|
|
|
|
|
|
|
|
|
vae = pipe.vae |
|
|
|
pipe.enable_model_cpu_offload() |
|
|
|
def read_content(file_path: str) -> str: |
|
"""read the content of target file |
|
""" |
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
content = f.read() |
|
|
|
return content |
|
|
|
@spaces.GPU(enable_queue=True) |
|
def predict(dict, prompt="", negative_prompt = default_negative_prompt, guidance_scale=1.2, steps=12, seed=123456): |
|
if negative_prompt == "": |
|
negative_prompt = None |
|
|
|
init_image = Image.fromarray(dict['background'][:, :, :3], 'RGB') |
|
mask = Image.fromarray(dict['layers'][0][:,:,3], 'L') |
|
|
|
|
|
width, height = get_size(init_image) |
|
|
|
init_image = init_image.resize((width, height)) |
|
mask = mask.resize((width, height)) |
|
|
|
|
|
masked_image, image_mask, masked_image_to_present = get_masked_image(init_image, mask, width, height) |
|
masked_image_tensor = image_transforms(masked_image) |
|
masked_image_tensor = (masked_image_tensor - 0.5) / 0.5 |
|
|
|
masked_image_tensor = masked_image_tensor.unsqueeze(0).to(device="cuda") |
|
|
|
control_latents = vae.encode( |
|
masked_image_tensor[:, :3, :, :].to(vae.dtype) |
|
).latent_dist.sample() |
|
|
|
control_latents = control_latents * vae.config.scaling_factor |
|
|
|
image_mask = np.array(image_mask)[:,:] |
|
mask_tensor = torch.tensor(image_mask, dtype=torch.float32)[None, ...] |
|
|
|
mask_tensor = torch.where(mask_tensor > 128.0, 255.0, 0) |
|
|
|
mask_tensor = mask_tensor / 255.0 |
|
|
|
mask_tensor = mask_tensor.to(device="cuda") |
|
mask_resized = torch.nn.functional.interpolate(mask_tensor[None, ...], size=(control_latents.shape[2], control_latents.shape[3]), mode='nearest') |
|
|
|
masked_image = torch.cat([control_latents, mask_resized], dim=1) |
|
|
|
generator = torch.Generator(device='cuda').manual_seed(int(seed)) |
|
|
|
output = pipe(prompt = prompt, |
|
width=width, |
|
height=height, |
|
negative_prompt=negative_prompt, |
|
image = masked_image, |
|
init_image = init_image, |
|
mask_image = mask_tensor, |
|
guidance_scale = guidance_scale, |
|
num_inference_steps=int(steps), |
|
|
|
generator=generator, |
|
controlnet_conditioning_sale=1.0) |
|
|
|
torch.cuda.empty_cache |
|
return output.images[0] |
|
|
|
|
|
css = ''' |
|
.gradio-container{max-width: 1100px !important} |
|
#image_upload{min-height:400px} |
|
#image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 400px} |
|
#mask_radio .gr-form{background:transparent; border: none} |
|
#word_mask{margin-top: .75em !important} |
|
#word_mask textarea:disabled{opacity: 0.3} |
|
.footer {margin-bottom: 45px;margin-top: 35px;text-align: center;border-bottom: 1px solid #e5e5e5} |
|
.footer>p {font-size: .8rem; display: inline-block; padding: 0 10px;transform: translateY(10px);background: white} |
|
.dark .footer {border-color: #303030} |
|
.dark .footer>p {background: #0b0f19} |
|
.acknowledgments h4{margin: 1.25em 0 .25em 0;font-weight: bold;font-size: 115%} |
|
#image_upload .touch-none{display: flex} |
|
@keyframes spin { |
|
from { |
|
transform: rotate(0deg); |
|
} |
|
to { |
|
transform: rotate(360deg); |
|
} |
|
} |
|
#share-btn-container {padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; max-width: 13rem; margin-left: auto;} |
|
div#share-btn-container > div {flex-direction: row;background: black;align-items: center} |
|
#share-btn-container:hover {background-color: #060606} |
|
#share-btn {all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.5rem !important; padding-bottom: 0.5rem !important;right:0;} |
|
#share-btn * {all: unset} |
|
#share-btn-container div:nth-child(-n+2){width: auto !important;min-height: 0px !important;} |
|
#share-btn-container .wrap {display: none !important} |
|
#share-btn-container.hidden {display: none!important} |
|
#prompt input{width: calc(100% - 160px);border-top-right-radius: 0px;border-bottom-right-radius: 0px;} |
|
#run_button{position:absolute;margin-top: 11px;right: 0;margin-right: 0.8em;border-bottom-left-radius: 0px; |
|
border-top-left-radius: 0px;} |
|
#prompt-container{margin-top:-18px;} |
|
#prompt-container .form{border-top-left-radius: 0;border-top-right-radius: 0} |
|
#image_upload{border-bottom-left-radius: 0px;border-bottom-right-radius: 0px} |
|
''' |
|
|
|
image_blocks = gr.Blocks(css=css, elem_id="total-container") |
|
with image_blocks as demo: |
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown("## BRIA GenerativeFill") |
|
gr.HTML(''' |
|
<p style="margin-bottom: 10px; font-size: 94%"> |
|
This is a demo for |
|
<a href="https://huggingface.co/briaai/DEV-GenerativeFill" target="_blank">BRIA Generative Fill </a>. |
|
BRIA Generative Fill improve the inpainting task for Modify/Add/Replace compared to BRIA 2.3 inpainting while still trained on licensed data, and so provide full legal liability coverage for copyright and privacy infringement. |
|
</p> |
|
''') |
|
with gr.Row(): |
|
with gr.Column(): |
|
image = gr.Image(sources=['upload'], elem_id="image_upload", tool='sketch', type="pil", label="Upload", height=400) |
|
with gr.Row(elem_id="prompt-container", equal_height=True): |
|
with gr.Row(): |
|
prompt = gr.Textbox(placeholder="Your prompt (what you want in place of what is erased)", show_label=False, elem_id="prompt") |
|
btn = gr.Button("Inpaint!", elem_id="run_button") |
|
|
|
with gr.Accordion(label="Advanced Settings", open=False): |
|
with gr.Row(equal_height=True): |
|
guidance_scale = gr.Number(value=1.2, minimum=0.8, maximum=2.5, step=0.1, label="guidance_scale") |
|
steps = gr.Number(value=12, minimum=6, maximum=20, step=1, label="steps") |
|
|
|
seed = gr.Number(value=123456, minimum=0, maximum=999999, step=1, label="seed") |
|
negative_prompt = gr.Textbox(label="negative_prompt", value=default_negative_prompt, placeholder=default_negative_prompt, info="what you don't want to see in the image") |
|
|
|
|
|
with gr.Column(): |
|
image_out = gr.Image(label="Output", elem_id="output-img", height=400) |
|
|
|
|
|
|
|
btn.click(fn=predict, inputs=[image, prompt, negative_prompt, guidance_scale, steps, seed], outputs=[image_out], api_name='run') |
|
prompt.submit(fn=predict, inputs=[image, prompt, negative_prompt, guidance_scale, steps, seed], outputs=[image_out]) |
|
|
|
gr.HTML( |
|
""" |
|
<div class="footer"> |
|
<p>Model by <a href="https://huggingface.co/diffusers" style="text-decoration: underline;" target="_blank">Diffusers</a> - Gradio Demo by 🤗 Hugging Face |
|
</p> |
|
</div> |
|
""" |
|
) |
|
|
|
image_blocks.queue(max_size=25,api_open=False).launch(show_api=False) |