import spaces
import gradio as gr
import numpy as np
import torch
from diffusers import DDPMScheduler, StableDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
from diffusers import StableDiffusionInstructPix2PixPipeline, LCMScheduler

# InstructPix2Pix with LCM specified scheduler
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
       "timbrooks/instruct-pix2pix", torch_dtype=torch.float16
       )
pipe = pipe.to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

# Adapt the InstructPix2Pix model using the LoRA parameters
adapter_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(adapter_id)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

@spaces.GPU(duration=30)
def infer(image, edit_instruction, guidance_scale, image_guidance_scale, n_steps):

    image = pipe(prompt=edit_instruction, 
             image=image,
             num_inference_steps=n_steps, 
             guidance_scale=guidance_scale,
             image_guidance_scale=image_guidance_scale,
             ).images[0]

    return image

css="""
#col-container {
    margin: 0 auto;
    max-width: 1024px;
}
"""

if torch.cuda.is_available():
    power_device = "GPU"
else:
    power_device = "CPU"

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            f"""
        # ⚡ Invertible Consistency Distillation ⚡ 
        # ⚡ Text-guided image editing with 8-step iCD-SD1.5 ⚡
        This is a demo for [Invertible Consistency Distillation](https://yandex-research.github.io/invertible-cd/), 
        a diffusion distillation method proposed in [Invertible Consistency Distillation for Text-Guided Image Editing in Around 7 Steps](https://arxiv.org/abs/2406.14539)
        by [Yandex Research](https://github.com/yandex-research).
        Currently running on {power_device}
        """
        )
        gr.Markdown(
            "**Please** check the examples to catch the intuition behind the hyperparameters, which are quite important for successful editing. A short description: <br />1. *Dynamic guidance tau*. Controls the interval where guidance is applied: if t < tau, then guidance is turned on for t < tau."
            " Lower tau values provide better reference preservation. We commonly use tau=0.6 and tau=0.8. <br />"
            "2. *Cross replace steps (crs)* and *self replace steps (srs)*. Controls the time step interval " 
            "where the cross- and self-attention maps are replaced. Higher values lead to better preservation of the reference image. "
            "The optimal values depend on the particular image. " 
            "Mostly, we use crs and srs from 0.2 to 0.6. <br />"
            "3. *Amplify word* and *Amplify factor*. Define the word that needs to be enhanced in the edited image. <br />"
            "4. *Blended word*. Specifies the object used for making local edits. That is, edit only selected objects. <br />"
            "5. *Is replacement*. You can set True, if you replace only one word in the original prompt. But False also works in these cases."
        )
        gr.Markdown(
            "Feel free to check out our [image generation demo](https://huggingface.co/spaces/dbaranchuk/iCD-image-generation) as well."
        )
        gr.Markdown(
            "If you enjoy the space, feel free to give a ⭐ to the <a href='https://github.com/yandex-research/invertible-cd' target='_blank'>Github Repo</a>. [![GitHub Stars](https://img.shields.io/github/stars/yandex-research/invertible-cd?style=social)](https://github.com/yandex-research/invertible-cd)"
        )
        with gr.Row():
            
            edit_instruction = gr.Text(
                label="Edit instruction",
                max_lines=1,
                placeholder="Enter your prompt",
            )
            
        
        with gr.Row():
            
            with gr.Column():
                image = gr.Image(label="Input image", height=512, width=512, show_label=False)
            with gr.Column():
                result = gr.Image(label="Result", height=512, width=512, show_label=False)

        with gr.Accordion("Advanced Settings", open=True):
            
            with gr.Row():
                
                guidance_scale = gr.Slider(
                    label="guidance scale",
                    minimum=1.0,
                    maximum=8.0,
                    step=1.0,
                    value=2.0,
                )

                image_guidance_scale = gr.Slider(
                    label="image guidance scale",
                    minimum=1.0,
                    maximum=8.0,
                    step=1.0,
                    value=1.0,
                )

                n_steps = gr.Slider(
                    label="inference steps",
                    minimum=1.0,
                    maximum=10.0,
                    step=1.0,
                    value=4.0,
                )

        with gr.Row():
            run_button = gr.Button("Edit", scale=0)

        with gr.Row():
            examples = [
                [
                    "examples/orig_3.jpg", #input_image
                    "a photo of a basket of apples", #src_prompt
                    "a photo of a basket of oranges", #tgt_prompt
                    20, #guidance_scale
                    0.6, #tau
                    0.4, #crs
                    0.6, #srs
                    1, #amplify factor
                    'oranges', # amplify word
                    '', #orig blend
                    'oranges', #edited blend
                    False #replacement
                ],
                [
                    "examples/orig_3.jpg", #input_image
                    "a photo of a basket of apples", #src_prompt
                    "a photo of a basket of puppies", #tgt_prompt
                    20, #guidance_scale
                    0.6, #tau
                    0.4, #crs
                    0.1, #srs
                    2, #amplify factor
                    'puppies', # amplify word
                    '', #orig blend
                    'puppies', #edited blend
                    True #replacement
                ],
                [
                    "examples/orig_3.jpg", #input_image
                    "a photo of a basket of apples", #src_prompt
                    "a photo of a basket of apples under snowfall", #tgt_prompt
                    20, #guidance_scale
                    0.6, #tau
                    0.4, #crs
                    0.4, #srs
                    30, #amplify factor
                    'snowfall', # amplify word
                    '', #orig blend
                    'snowfall', #edited blend
                    False #replacement
                ],
                [
                    "examples/orig_1.jpg", #input_image
                    "a photo of an owl", #src_prompt
                    "a photo of an yellow owl", #tgt_prompt
                    20, #guidance_scale
                    0.6, #tau
                    0.9, #crs
                    0.9, #srs
                    20, #amplify factor
                    'yellow', # amplify word
                    'owl', #orig blend
                    'yellow', #edited blend
                    False #replacement
                ],
               [
                    "examples/orig_1.jpg", #input_image
                    "a photo of an owl", #src_prompt
                    "an anime-style painting of an owl", #tgt_prompt
                    20, #guidance_scale
                    0.8, #tau
                    0.6, #crs
                    0.3, #srs
                    10, #amplify factor
                    'anime-style', # amplify word
                    'painting', #orig blend
                    'anime-style', #edited blend
                    False #replacement
                ],
                [
                    "examples/orig_1.jpg", #input_image
                    "a photo of an owl", #src_prompt
                    "a photo of an owl underwater with many fishes nearby", #tgt_prompt
                    20, #guidance_scale
                    0.8, #tau
                    0.4, #crs
                    0.4, #srs
                    18, #amplify factor
                    'fishes', # amplify word
                    '', #orig blend
                    'fishes', #edited blend
                    False #replacement
                ],
                [
                    "examples/orig_2.jpg", #input_image
                    "a photograph of a teddy bear sitting on a wall", #src_prompt
                    "a photograph of a teddy bear sitting on a wall surrounded by roses", #tgt_prompt
                    20, #guidance_scale
                    0.6, #tau
                    0.4, #crs
                    0.1, #srs
                    25, #amplify factor
                    'roses', # amplify word
                    '', #orig blend
                    'roses', #edited blend
                    False #replacement
                ],
                [
                    "examples/orig_2.jpg", #input_image
                    "a photograph of a teddy bear sitting on a wall", #src_prompt
                    "a photograph of a wooden bear sitting on a wall", #tgt_prompt
                    20, #guidance_scale
                    0.8, #tau
                    0.5, #crs
                    0.5, #srs
                    14, #amplify factor
                    'wooden', # amplify word
                    '', #orig blend
                    'wooden', #edited blend
                    True #replacement
                ],
                [
                    "examples/orig_2.jpg", #input_image
                    "a photograph of a teddy bear sitting on a wall", #src_prompt
                    "a photograph of a teddy rabbit sitting on a wall", #tgt_prompt
                    20, #guidance_scale
                    0.8, #tau
                    0.4, #crs
                    0.4, #srs
                    3, #amplify factor
                    'rabbit', # amplify word
                    '', #orig blend
                    'rabbit', #edited blend
                    True #replacement
                ],
            ]
  
            #gr.Examples(
            #   examples = examples,
            #   inputs =[input_image, input_prompt, prompt,
            #    guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
            #    blend_orig, blend_edited, is_replacement],
            #   outputs=[
            #            result
            #            ],
            #   fn=infer, cache_examples=True
            #)

    run_button.click(
        fn = infer,
        inputs=[image, edit_instruction, guidance_scale, image_guidance_scale, n_steps],
        outputs = [result]
    )

demo.queue().launch()