|
import PIL |
|
import requests |
|
import torch |
|
import gradio as gr |
|
import random |
|
from PIL import Image |
|
import os |
|
import time |
|
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler |
|
|
|
|
|
model_id = "timbrooks/instruct-pix2pix" |
|
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16") |
|
pipe.to("cuda") |
|
|
|
pipe.enable_xformers_memory_efficient_attention() |
|
pipe.unet.to(memory_format=torch.channels_last) |
|
|
|
|
|
help_text = """ Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) - |
|
|
|
If you're not getting what you want, there may be a few reasons: |
|
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images |
|
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should |
|
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. |
|
|
|
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance |
|
scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely |
|
linked to the source image `image`, usually at the expense of lower image quality. |
|
|
|
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog"). |
|
|
|
4. Increasing the number of steps sometimes improves results. |
|
|
|
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try: |
|
* Cropping the image so the face takes up a larger portion of the frame. |
|
""" |
|
|
|
def previous(image): |
|
return image |
|
|
|
def upload_image(file): |
|
return Image.open(file) |
|
|
|
def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)): |
|
progress(0, desc="Starting...") |
|
if prompt != '' and prompt.lower() == 'reverse' : |
|
history = history or [] |
|
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' |
|
image_oneup.save(temp_img_name) |
|
response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">' |
|
history.append((prompt, response)) |
|
return history, history, image_oneup, temp_img_name, counter_out |
|
if prompt != '' and prompt.lower() == 'restart' : |
|
history = history or [] |
|
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' |
|
|
|
basewidth = 512 |
|
wpercent = (basewidth/float(image_in.size[0])) |
|
hsize = int((float(image_in.size[1])*float(wpercent))) |
|
image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) |
|
image_in.save(temp_img_name) |
|
response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">' |
|
history.append((prompt, response)) |
|
return history, history, image_in, temp_img_name, counter_out |
|
|
|
add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"] |
|
if counter_out > 1: |
|
edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] |
|
if os.path.exists(img_name): |
|
os.remove(img_name) |
|
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' |
|
|
|
with open(temp_img_name, "wb") as fp: |
|
|
|
edited_image.save(fp) |
|
|
|
saved_image_name2 = fp.name |
|
|
|
history = history or [] |
|
response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name2 + '">' |
|
history.append((prompt, response)) |
|
counter_out += 1 |
|
return history, history, edited_image, temp_img_name, counter_out |
|
elif counter_out == 0: |
|
seed = random.randint(0, 1000000) |
|
img_name = f"./edited_image_{seed}.png" |
|
|
|
image_in = Image.open(btn_upload) |
|
|
|
basewidth = 512 |
|
wpercent = (basewidth/float(image_in.size[0])) |
|
hsize = int((float(image_in.size[1])*float(wpercent))) |
|
image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) |
|
if os.path.exists(img_name): |
|
os.remove(img_name) |
|
with open(img_name, "wb") as fp: |
|
|
|
image_in.save(fp) |
|
|
|
saved_image_name0 = fp.name |
|
history = history or [] |
|
response = '<img src="/file=' + img_name + '">' |
|
history.append((prompt, response)) |
|
counter_out += 1 |
|
return history, history, image_in, img_name, counter_out |
|
elif counter_out == 1: |
|
|
|
edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] |
|
if os.path.exists(img_name): |
|
os.remove(img_name) |
|
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' |
|
with open(temp_img_name, "wb") as fp: |
|
|
|
edited_image.save(fp) |
|
|
|
saved_image_name1 = fp.name |
|
history = history or [] |
|
response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name1 + '">' |
|
history.append((prompt, response)) |
|
counter_out += 1 |
|
return history, history, edited_image, temp_img_name, counter_out |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("""<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions</h1></center> |
|
<p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/> |
|
<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"> |
|
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> |
|
|
|
**Note: Please be advised that a safety checker has been implemented in this public space. |
|
Any attempts to generate inappropriate or NSFW images will result in the display of a black screen |
|
as a precautionary measure to protect all users. We appreciate your cooperation in |
|
maintaining a safe and appropriate environment for all members of our community.** |
|
|
|
New features and bug-fixes: |
|
|
|
1. Now use 'reverse' as prompt to get back the previous image after an unwanted edit |
|
2. Use 'restart' as prompt to get back to original image and start over! |
|
3. Now you can load larger images (~5 mb) as well |
|
|
|
<p/>""") |
|
with gr.Column(): |
|
chatbot = gr.Chatbot() |
|
state_in = gr.State() |
|
with gr.Row(): |
|
text_in = gr.Textbox(value='', Plaseholder="Enter your instructions here") |
|
btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single") |
|
with gr.Accordion("Advance settings for Training and Inference", open=False): |
|
image_in = gr.Image(visible=False,type='pil', label="Original Image") |
|
gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.") |
|
in_steps = gr.Number(label="Enter the number of Inference steps", value = 20) |
|
in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5) |
|
in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5) |
|
image_hid = gr.Image(type='pil', visible=False) |
|
image_oneup = gr.Image(type='pil', visible=False) |
|
img_name_temp_out = gr.Textbox(visible=False) |
|
counter_out = gr.Number(visible=False, value=0, precision=0) |
|
|
|
btn_upload.upload(chat, |
|
[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], |
|
[chatbot, state_in, image_in, img_name_temp_out, counter_out]) |
|
text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out]) |
|
text_in.submit(previous, [image_hid], [image_oneup]) |
|
gr.Markdown(help_text) |
|
|
|
demo.queue(concurrency_count=10) |
|
demo.launch(debug=True, width="80%", height=2000) |