import PIL
import requests
import torch
import gradio as gr
import random
from PIL import Image
import os
import time
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

#Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16") #, safety_checker=None)
pipe.to("cuda")
#pipe.enable_attention_slicing()
pipe.enable_xformers_memory_efficient_attention()
pipe.unet.to(memory_format=torch.channels_last)


help_text = """ 
**Note: Please be advised that a safety checker has been implemented in this public space. 
    Any attempts to generate inappropriate or NSFW images will result in the display of a black screen 
    as a precautionary measure to protect all users. We appreciate your cooperation in 
    maintaining a safe and appropriate environment for all members of our community.**
    
    New features and bug-fixes: 
    
    1. Chat style interface
    2. Now use **'reverse'** as prompt to get back the previous image after an unwanted edit
    3. Use **'restart'** as prompt to get back to original image and start over!
    4. Now you can load larger image files (~5 mb) as well

Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images 
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should 
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 
                
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance 
                scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely 
                linked to the source image `image`, usually at the expense of lower image quality.  
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").
4. Increasing the number of steps sometimes improves results.
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
    * Cropping the image so the face takes up a larger portion of the frame.
"""

css = """
#col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.footer {
        margin-bottom: 45px;
        margin-top: 10px;
        text-align: center;
        border-bottom: 1px solid #e5e5e5;
    }
    .footer>p {
        font-size: .8rem;
        display: inline-block;
        padding: 0 10px;
        transform: translateY(10px);
        background: white;
    }
    .dark .footer {
        border-color: #303030;
    }
    .dark .footer>p {
        background: #0b0f19;
    }
.animate-spin {
    animation: spin 1s linear infinite;
}
@keyframes spin {
    from {
        transform: rotate(0deg);
    }
    to {
        transform: rotate(360deg);
    }
}
"""


def previous(image):
    return image 

def upload_image(file):
    return Image.open(file)
    
def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)):
    progress(0, desc="Starting...")
    if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later
        history = history or []
        temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' 
        image_oneup.save(temp_img_name)
        response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'  
        history.append((prompt, response))
        return history, history, image_oneup, temp_img_name, counter_out
    if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later
        history = history or []
        temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' 
        #Resizing the image
        basewidth = 512
        wpercent = (basewidth/float(image_in.size[0]))
        hsize = int((float(image_in.size[1])*float(wpercent)))
        image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
        image_in.save(temp_img_name)
        response = 'Reverted to the last image ' + '<img src="/file=' + temp_img_name + '">'  
        history.append((prompt, response))
        return history, history, image_in, temp_img_name, counter_out
    #adding supportive sample text
    add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]        
    if counter_out > 1:
      edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
      if os.path.exists(img_name):
        os.remove(img_name)
      temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' 
      # Create a file-like object
      with open(temp_img_name, "wb") as fp:
        # Save the image to the file-like object
        edited_image.save(fp)
      #Get the name of the saved image
      saved_image_name2 = fp.name
      #edited_image.save(temp_img_name) #, overwrite=True)
      history = history or []
      response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name2 + '">'  
      history.append((prompt, response))
      counter_out += 1
      return history, history, edited_image, temp_img_name, counter_out
    elif counter_out == 0:
      seed = random.randint(0, 1000000)
      img_name = f"./edited_image_{seed}.png"
      #convert file object to image
      image_in = Image.open(btn_upload)
      #Resizing the image
      basewidth = 512
      wpercent = (basewidth/float(image_in.size[0]))
      hsize = int((float(image_in.size[1])*float(wpercent)))
      image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
      if os.path.exists(img_name):
        os.remove(img_name)
      with open(img_name, "wb") as fp:
        # Save the image to the file-like object
        image_in.save(fp)
      #Get the name of the saved image
      saved_image_name0 = fp.name
      history = history or []
      response = '<img src="/file=' + img_name + '">'   #IMG_NAME
      history.append((prompt, response))
      counter_out += 1
      return history, history, image_in, img_name, counter_out
    elif counter_out == 1:        
      #instruct-pix2pix inference
      edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
      if os.path.exists(img_name):
        os.remove(img_name)
      temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' 
      with open(temp_img_name, "wb") as fp:
        # Save the image to the file-like object
        edited_image.save(fp)
      #Get the name of the saved image
      saved_image_name1 = fp.name
      history = history or []
      response = random.choice(add_text_list) + '<img src="/file=' + saved_image_name1 + '">'   #IMG_NAME
      history.append((prompt, response))
      counter_out += 1
      return history, history, edited_image, temp_img_name, counter_out
        

#Blocks layout
with gr.Blocks(css="style.css") as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    ChatPix2Pix: Image Editing by Instructions
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
                <a href="https://huggingface.co/timbrooks/instruct-pix2pix" target="_blank">Diffusers implementation of instruct-pix2pix</a> - InstructPix2Pix: Learning to Follow Image Editing Instructions!
                </p>
            </div>""")
        #gr.Markdown("""<h1><center>dummy</h1></center> """)
        chatbot = gr.Chatbot()
        state_in = gr.State()
        with gr.Row():
            text_in = gr.Textbox(value='', Plaseholder="Enter your instructions here")
            btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single")
        with gr.Accordion("Advance settings for Training and Inference", open=False):
          image_in = gr.Image(visible=False,type='pil', label="Original Image")
          gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.")
          in_steps = gr.Number(label="Enter the number of Inference steps", value = 20)
          in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5)
          in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5)
          image_hid = gr.Image(type='pil', visible=False)
          image_oneup = gr.Image(type='pil', visible=False)
          img_name_temp_out = gr.Textbox(visible=False)
          counter_out = gr.Number(visible=False, value=0, precision=0)

    btn_upload.upload(chat,
                      [btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup,  text_in, state_in], 
                      [chatbot, state_in, image_in, img_name_temp_out, counter_out])
    text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup,  text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out])
    text_in.submit(previous, [image_hid], [image_oneup])
    gr.Markdown(help_text)
    
demo.queue(concurrency_count=10)
demo.launch(debug=True, width="80%", height=2000)