import cv2
import gradio as gr
import numpy as np
import torch

from diffusers import StableDiffusionControlNetPipeline, StableDiffusionLatentUpscalePipeline, ControlNetModel, AutoencoderKL
from diffusers import UniPCMultistepScheduler
from PIL import Image

from lpw import _encode_prompt

controlnet_ColorCanny = ControlNetModel.from_pretrained("ghoskno/Color-Canny-Controlnet-model", torch_dtype=torch.float16)

vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)

pipe = StableDiffusionControlNetPipeline.from_pretrained("Lykon/DreamShaper", vae=vae, controlnet=controlnet_ColorCanny, torch_dtype=torch.float16)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_attention_slicing()

# Generator seed
generator = torch.manual_seed(0)

def HWC3(x):
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    if C == 4:
        color = x[:, :, 0:3].astype(np.float32)
        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
        y = color * alpha + 255.0 * (1.0 - alpha)
        y = y.clip(0, 255).astype(np.uint8)
        return y

def resize_image(input_image, resolution, max_edge=False, edge_limit=False):
    H, W, C = input_image.shape

    H = float(H)
    W = float(W)
    if max_edge:
        k = float(resolution) / max(H, W)
    else:
        k = float(resolution) / min(H, W)
    H *= k
    W *= k

    H, W = int(H), int(W)

    img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
    if not edge_limit:
        return img
    pH = int(np.round(H / 64.0)) * 64
    pW = int(np.round(W / 64.0)) * 64
    pimg = np.zeros((pH, pW, 3), dtype=img.dtype)

    oH, oW = (pH-H)//2, (pW-W)//2
    pimg[oH:oH+H, oW:oW+W] = img
    return pimg

def get_canny_filter(image, format='pil', low_threshold=100, high_threshold=200):
    
    if not isinstance(image, np.ndarray):
        image = np.array(image) 
        
    image = cv2.Canny(image, low_threshold, high_threshold)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    if format == 'pil':
        image = Image.fromarray(image)
    return image

def get_color_filter(cond_image, mask_size=64):
    H, W = cond_image.shape[:2]
    cond_image = cv2.resize(cond_image, (W // mask_size, H // mask_size), interpolation=cv2.INTER_CUBIC)
    color = cv2.resize(cond_image, (W, H), interpolation=cv2.INTER_NEAREST)
    return color

def get_colorcanny(image, mask_size):
    
    if not isinstance(image, np.ndarray):
        image = np.array(image) 
     
    canny_img = get_canny_filter(image, format='np')

    color_img = get_color_filter(image, int(mask_size))

    color_img[np.where(canny_img > 128)] = 255
    color_img = Image.fromarray(color_img)
    return color_img
    
def process(input_image, prompt, n_prompt, strength=1.0, color_mask_size=96, size=512, scale=6.0, ddim_steps=20):
    prompt_embeds, negative_prompt_embeds = _encode_prompt(pipe, prompt, pipe.device, 1, True, n_prompt, 3)
    input_image = resize_image(input_image, size, max_edge=True, edge_limit=True)

    cond_img = get_colorcanny(input_image, color_mask_size)
    output = pipe(
        prompt_embeds=prompt_embeds, 
        negative_prompt_embeds=negative_prompt_embeds,
        image=cond_img,
        generator=generator,
        num_images_per_prompt=1,
        num_inference_steps=ddim_steps,
        guidance_scale=scale,
        controlnet_conditioning_scale=float(strength)
    )
    return [output.images[0], cond_img]


block = gr.Blocks().queue()

with block:
    gr.Markdown("""
    # Color-Canny-Controlnet

    This is a demo on Controlnet based on Color & Canny
    """)
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(source='upload', type="numpy")
            prompt = gr.Textbox(label="Prompt", value='')
            n_prompt = gr.Textbox(label="Negative Prompt", value='')
            run_button = gr.Button(label="Run")
            with gr.Accordion('Advanced', open=False):
                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
                color_mask_size = gr.Slider(label="Color Mask Size", minimum=32, maximum=256, value=96, step=16)
                size = gr.Slider(label="Size", minimum=256, maximum=768, value=512, step=128)
                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.0, step=0.1)
                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=50, value=20, step=1)
            
        with gr.Column():
            result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
    ips = [input_image, prompt, n_prompt, strength, color_mask_size, size, scale, ddim_steps]
    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])

    gr.Examples(
        examples=[
            ["./asserts/1.png", "a concept art of by Makoto Shinkai, a girl is standing in the middle of the sea", "text, bad anatomy, blurry, (low quality, blurry)"],
            ["./asserts/2.png", "a concept illustration with saturated vivid watercolors by Erin Hanson and Moebius stylized graphic scene", "text, bad anatomy, blurry, (low quality, blurry)"],
            ["./asserts/3.png", "sky city on the sea, with waves churning and wind power plants on the island", "text, bad anatomy, blurry, (low quality, blurry)"],
        ],
        inputs=[
            input_image, prompt, n_prompt
        ],
        outputs=result_gallery,
        fn=process,
        cache_examples=True,
    )
block.launch(debug = True, server_name='0.0.0.0')