File size: 4,353 Bytes
ee3b2de
 
 
 
 
490262d
ee3b2de
 
6371eda
ee3b2de
 
 
 
 
 
6371eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee3b2de
af4ad56
 
ee3b2de
af4ad56
 
ee3b2de
af4ad56
 
e140357
ee3b2de
 
 
af4ad56
ee3b2de
 
af4ad56
ee3b2de
e83d19e
ee3b2de
af4ad56
 
ee3b2de
 
 
 
6371eda
88724e7
6ab5788
 
 
ee3b2de
 
 
 
 
 
 
 
d850e97
 
19f5b6b
ee3b2de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import PIL
import requests
import torch
import gradio as gr
import random
from PIL import Image
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

#Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", safety_checker=None)
pipe.to("cuda")
pipe.enable_attention_slicing()

counter = 0


help_text = """ Note: I will try to add the functionality to revert your changes to previous/original image in future versions of space. For now only forward editing is available.

From the official Space by the authors [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) 
and from official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images 
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should 
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 
                
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance 
                scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely 
                linked to the source image `image`, usually at the expense of lower image quality.  

3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").

4. Increasing the number of steps sometimes improves results.

5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
    * Cropping the image so the face takes up a larger portion of the frame.
"""

def chat(image_in, message, history, progress=gr.Progress(track_tqdm=True)):
    progress(0, desc="Starting...")
    global counter 
    #global seed 
    #img_nm = f"./edited_image_{seed}.png"
    counter += 1
    #print(f"seed is : {seed}")
    #print(f"image_in name is :{img_nm}")
    
    #if message == "revert": --to add revert functionality later
    if counter > 1:
      # Open the image
      image_in = Image.open("edited_image.png") #(img_nm) 
    prompt = message #eg - "turn him into cyborg"
    edited_image = pipe(prompt, image=image_in, num_inference_steps=20, image_guidance_scale=1).images[0]
    edited_image.save("edited_image.png") # (img_nm) #("./edited_image.png")
    history = history or []
    add_text_list = ["There you go ", "Enjoy your image! ", "Nice work! Wonder what you gonna do next! ", "Way to go! ", "Does this work for you? ", "Something like this? "]
    #Resizing the image for better display
    #response = random.choice(add_text_list) + '<img src="/file=' + img_nm[2:] + '" style="width: 200px; height: 200px;">'
    response =  random.choice(add_text_list) + '<img src="/file=edited_image.png" style="width: 200px; height: 200px;">'
    history.append((message, response))
    return history, history

with gr.Blocks() as demo:
    gr.Markdown("""<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions </h1></center>
    <p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/>
    <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true">
    <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
    <p/>""")
    with gr.Row():
      with gr.Column():
        image_in = gr.Image(type='pil', label="Original Image")
        text_in = gr.Textbox()
        state_in = gr.State()
        b1 = gr.Button('Edit the image!')
      chatbot = gr.Chatbot() 
    b1.click(chat,[image_in, text_in, state_in], [chatbot, state_in])
    gr.Markdown(help_text)
    
demo.queue(concurrency_count=10) 
demo.launch(debug=True, width="80%", height=1500)