Spaces:

ysharma
/

InstructPix2Pix_Chatbot

Paused

App Files Files Community

InstructPix2Pix_Chatbot / app.py

ysharma HF Staff

update the layout to chat style

6e0790c almost 3 years ago

raw

history blame

9.42 kB

	import PIL
	import requests
	import torch
	import gradio as gr
	import random
	from PIL import Image
	import os
	import time
	from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

	#Loading from Diffusers Library
	model_id = "timbrooks/instruct-pix2pix"
	pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16") #, safety_checker=None)
	pipe.to("cuda")
	#pipe.enable_attention_slicing()
	pipe.enable_xformers_memory_efficient_attention()
	pipe.unet.to(memory_format=torch.channels_last)


	help_text = """ Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

	If you're not getting what you want, there may be a few reasons:
	1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images
	that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should
	be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image.

	2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance
	scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely
	linked to the source image `image`, usually at the expense of lower image quality.

	3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").

	4. Increasing the number of steps sometimes improves results.

	5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
	* Cropping the image so the face takes up a larger portion of the frame.
	"""

	def previous(image):
	return image

	def upload_image(file):
	return Image.open(file)

	def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)):
	progress(0, desc="Starting...")
	if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later
	history = history or []
	temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
	image_oneup.save(temp_img_name)
	response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'
	history.append((prompt, response))
	return history, history, image_oneup, temp_img_name, counter_out
	if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later
	history = history or []
	temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
	#Resizing the image
	basewidth = 512
	wpercent = (basewidth/float(image_in.size[0]))
	hsize = int((float(image_in.size[1])*float(wpercent)))
	image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
	image_in.save(temp_img_name)
	response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'
	history.append((prompt, response))
	return history, history, image_in, temp_img_name, counter_out
	#adding supportive sample text
	add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]
	if counter_out > 1:
	edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
	if os.path.exists(img_name):
	os.remove(img_name)
	temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
	# Create a file-like object
	with open(temp_img_name, "wb") as fp:
	# Save the image to the file-like object
	edited_image.save(fp)
	#Get the name of the saved image
	saved_image_name2 = fp.name
	#edited_image.save(temp_img_name) #, overwrite=True)
	history = history or []
	response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name2 + '">'
	history.append((prompt, response))
	counter_out += 1
	return history, history, edited_image, temp_img_name, counter_out
	elif counter_out == 0:
	seed = random.randint(0, 1000000)
	img_name = f"./edited_image_{seed}.png"
	#convert file object to image
	image_in = Image.open(btn_upload)
	#Resizing the image
	basewidth = 512
	wpercent = (basewidth/float(image_in.size[0]))
	hsize = int((float(image_in.size[1])*float(wpercent)))
	image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
	if os.path.exists(img_name):
	os.remove(img_name)
	with open(img_name, "wb") as fp:
	# Save the image to the file-like object
	image_in.save(fp)
	#Get the name of the saved image
	saved_image_name0 = fp.name
	history = history or []
	response = '<img src="/file=' + img_name + '">' #IMG_NAME
	history.append((prompt, response))
	counter_out += 1
	return history, history, image_in, img_name, counter_out
	elif counter_out == 1:
	#instruct-pix2pix inference
	edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
	if os.path.exists(img_name):
	os.remove(img_name)
	temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
	with open(temp_img_name, "wb") as fp:
	# Save the image to the file-like object
	edited_image.save(fp)
	#Get the name of the saved image
	saved_image_name1 = fp.name
	history = history or []
	response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name1 + '">' #IMG_NAME
	history.append((prompt, response))
	counter_out += 1
	return history, history, edited_image, temp_img_name, counter_out


	#Blocks layout
	with gr.Blocks() as demo:
	gr.Markdown("""<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions</h1></center>
	<p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/>
	<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true">
	<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>

	**Note: Please be advised that a safety checker has been implemented in this public space.
	Any attempts to generate inappropriate or NSFW images will result in the display of a black screen
	as a precautionary measure to protect all users. We appreciate your cooperation in
	maintaining a safe and appropriate environment for all members of our community.**

	New features and bug-fixes:

	1. Now use 'reverse' as prompt to get back the previous image after an unwanted edit
	2. Use 'restart' as prompt to get back to original image and start over!
	3. Now you can load larger images (~5 mb) as well

	<p/>""")
	with gr.Column():
	chatbot = gr.Chatbot()
	state_in = gr.State()
	with gr.Row():
	text_in = gr.Textbox(value='', Plaseholder="Enter your instructions here")
	btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single")
	with gr.Accordion("Advance settings for Training and Inference", open=False):
	image_in = gr.Image(visible=False,type='pil', label="Original Image")
	gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.")
	in_steps = gr.Number(label="Enter the number of Inference steps", value = 20)
	in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5)
	in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5)
	image_hid = gr.Image(type='pil', visible=False)
	image_oneup = gr.Image(type='pil', visible=False)
	img_name_temp_out = gr.Textbox(visible=False)
	counter_out = gr.Number(visible=False, value=0, precision=0)

	btn_upload.upload(chat,
	[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in],
	[chatbot, state_in, image_in, img_name_temp_out, counter_out])
	text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out])
	text_in.submit(previous, [image_hid], [image_oneup])
	gr.Markdown(help_text)

	demo.queue(concurrency_count=10)
	demo.launch(debug=True, width="80%", height=2000)