import cv2 import gradio as gr import numpy as np import torch from diffusers import StableDiffusionControlNetPipeline, StableDiffusionLatentUpscalePipeline, ControlNetModel, AutoencoderKL from diffusers import UniPCMultistepScheduler from PIL import Image from lpw import _encode_prompt controlnet_ColorCanny = ControlNetModel.from_pretrained("ghoskno/Color-Canny-Controlnet-model", torch_dtype=torch.float16) vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16) pipe = StableDiffusionControlNetPipeline.from_pretrained("Lykon/DreamShaper", vae=vae, controlnet=controlnet_ColorCanny, torch_dtype=torch.float16) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) pipe.enable_model_cpu_offload() pipe.enable_xformers_memory_efficient_attention() pipe.enable_attention_slicing() # Generator seed generator = torch.manual_seed(0) def HWC3(x): assert x.dtype == np.uint8 if x.ndim == 2: x = x[:, :, None] assert x.ndim == 3 H, W, C = x.shape assert C == 1 or C == 3 or C == 4 if C == 3: return x if C == 1: return np.concatenate([x, x, x], axis=2) if C == 4: color = x[:, :, 0:3].astype(np.float32) alpha = x[:, :, 3:4].astype(np.float32) / 255.0 y = color * alpha + 255.0 * (1.0 - alpha) y = y.clip(0, 255).astype(np.uint8) return y def resize_image(input_image, resolution, max_edge=False, edge_limit=False): H, W, C = input_image.shape H = float(H) W = float(W) if max_edge: k = float(resolution) / max(H, W) else: k = float(resolution) / min(H, W) H *= k W *= k H, W = int(H), int(W) img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA) if not edge_limit: return img pH = int(np.round(H / 64.0)) * 64 pW = int(np.round(W / 64.0)) * 64 pimg = np.zeros((pH, pW, 3), dtype=img.dtype) oH, oW = (pH-H)//2, (pW-W)//2 pimg[oH:oH+H, oW:oW+W] = img return pimg def get_canny_filter(image, format='pil', low_threshold=100, high_threshold=200): if not isinstance(image, np.ndarray): image = np.array(image) image = cv2.Canny(image, low_threshold, high_threshold) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) if format == 'pil': image = Image.fromarray(image) return image def get_color_filter(cond_image, mask_size=64): H, W = cond_image.shape[:2] cond_image = cv2.resize(cond_image, (W // mask_size, H // mask_size), interpolation=cv2.INTER_CUBIC) color = cv2.resize(cond_image, (W, H), interpolation=cv2.INTER_NEAREST) return color def get_colorcanny(image, mask_size): if not isinstance(image, np.ndarray): image = np.array(image) canny_img = get_canny_filter(image, format='np') color_img = get_color_filter(image, int(mask_size)) color_img[np.where(canny_img > 128)] = 255 color_img = Image.fromarray(color_img) return color_img def process(input_image, prompt, n_prompt, strength=1.0, color_mask_size=96, size=512, scale=6.0, ddim_steps=20): prompt_embeds, negative_prompt_embeds = _encode_prompt(pipe, prompt, pipe.device, 1, True, n_prompt, 3) input_image = resize_image(input_image, size, max_edge=True, edge_limit=True) cond_img = get_colorcanny(input_image, color_mask_size) output = pipe( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, image=cond_img, generator=generator, num_images_per_prompt=1, num_inference_steps=ddim_steps, guidance_scale=scale, controlnet_conditioning_scale=float(strength) ) return [output.images[0], cond_img] block = gr.Blocks().queue() with block: gr.Markdown(""" # Color-Canny-Controlnet This is a demo on Controlnet based on Color & Canny """) with gr.Row(): with gr.Column(): input_image = gr.Image(source='upload', type="numpy") prompt = gr.Textbox(label="Prompt", value='') n_prompt = gr.Textbox(label="Negative Prompt", value='') run_button = gr.Button(label="Run") with gr.Accordion('Advanced', open=False): strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01) color_mask_size = gr.Slider(label="Color Mask Size", minimum=32, maximum=256, value=96, step=16) size = gr.Slider(label="Size", minimum=256, maximum=768, value=512, step=128) scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.0, step=0.1) ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=50, value=20, step=1) with gr.Column(): result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto') ips = [input_image, prompt, n_prompt, strength, color_mask_size, size, scale, ddim_steps] run_button.click(fn=process, inputs=ips, outputs=[result_gallery]) gr.Examples( examples=[ ["./asserts/1.png", "a concept art of by Makoto Shinkai, a girl is standing in the middle of the sea", "text, bad anatomy, blurry, (low quality, blurry)"], ["./asserts/2.png", "a concept illustration with saturated vivid watercolors by Erin Hanson and Moebius stylized graphic scene", "text, bad anatomy, blurry, (low quality, blurry)"], ["./asserts/3.png", "sky city on the sea, with waves churning and wind power plants on the island", "text, bad anatomy, blurry, (low quality, blurry)"], ], inputs=[ input_image, prompt, n_prompt ], outputs=result_gallery, fn=process, cache_examples=True, ) block.launch(debug = True, server_name='0.0.0.0')