import os # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # os.environ['CUDA_VISIBLE_DEVICES'] = '2' # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "caching_allocator" import gradio as gr import numpy as np from models import make_inpainting import utils from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation from PIL import Image import requests from transformers import pipeline import torch import random import io import base64 import json from diffusers import DiffusionPipeline from diffusers import StableDiffusionLatentUpscalePipeline, StableDiffusionPipeline from diffusers import StableDiffusionUpscalePipeline from diffusers import LDMSuperResolutionPipeline import cv2 import onnxruntime import xformers # from xformers.ops import MemoryEfficientAttentionFlashAttentionOp def removeFurniture(input_img1, input_img2, positive_prompt, negative_prompt, num_of_images, resolution ): print("removeFurniture") HEIGHT = resolution WIDTH = resolution input_img1 = input_img1.resize((resolution, resolution)) input_img2 = input_img2.resize((resolution, resolution)) canvas_mask = np.array(input_img2) mask = utils.get_mask(canvas_mask) print(input_img1, mask, positive_prompt, negative_prompt) retList= make_inpainting(positive_prompt=positive_prompt, image=input_img1, mask_image=mask, negative_prompt=negative_prompt, num_of_images=num_of_images, resolution=resolution ) # add the rest up to 10 while (len(retList)<10): retList.append(None) return retList def imageToString(img): output = io.BytesIO() img.save(output, format="png") return output.getvalue() def segmentation(img): print("segmentation") # semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024") pipe = pipeline("image-segmentation", "facebook/maskformer-swin-large-ade") results = pipe(img) for p in results: p['mask'] = utils.image_to_byte_array(p['mask']) p['mask'] = base64.b64encode(p['mask']).decode("utf-8") #print(results) return json.dumps(results) def upscale(image, prompt): print("upscale",image,prompt) device = "cuda" if torch.cuda.is_available() else "cpu" print("device",device) # image.thumbnail((512, 512)) # print("resize",image) pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", torch_dtype=torch.float16) # pipe = StableDiffusionLatentUpscalePipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16) pipe = pipe.to(device) pipe.enable_attention_slicing() pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp) # Workaround for not accepting attention shape using VAE for Flash Attention pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None) ret = pipe(prompt=prompt, image=image, num_inference_steps=10, guidance_scale=0) print("ret",ret) upscaled_image = ret.images[0] print("up",upscaled_image) return upscaled_image def upscale2(image, prompt): print("upscale2",image,prompt) device = "cuda" if torch.cuda.is_available() else "cpu" print("device",device) pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages", torch_dtype=torch.float16) pipe = pipe.to(device) pipe.enable_attention_slicing() upscaled_image = pipe(image, num_inference_steps=10, eta=1).images[0] return upscaled_image def convert_pil_to_cv2(image): # pil_image = image.convert("RGB") open_cv_image = np.array(image) # RGB to BGR open_cv_image = open_cv_image[:, :, ::-1].copy() return open_cv_image def inference(model_path: str, img_array: np.array) -> np.array: options = onnxruntime.SessionOptions() options.intra_op_num_threads = 1 options.inter_op_num_threads = 1 ort_session = onnxruntime.InferenceSession(model_path, options) ort_inputs = {ort_session.get_inputs()[0].name: img_array} ort_outs = ort_session.run(None, ort_inputs) return ort_outs[0] def post_process(img: np.array) -> np.array: # 1, C, H, W -> C, H, W img = np.squeeze(img) # C, H, W -> H, W, C img = np.transpose(img, (1, 2, 0))[:, :, ::-1].astype(np.uint8) return img def pre_process(img: np.array) -> np.array: # H, W, C -> C, H, W img = np.transpose(img[:, :, 0:3], (2, 0, 1)) # C, H, W -> 1, C, H, W img = np.expand_dims(img, axis=0).astype(np.float32) return img def upscale3(image): print("upscale3",image) model_path = f"up_models/modelx4.ort" img = convert_pil_to_cv2(image) # if img.ndim == 2: # print("upscale3","img.ndim == 2") # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # if img.shape[2] == 4: # print("upscale3","img.shape[2] == 4") # alpha = img[:, :, 3] # GRAY # alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2BGR) # BGR # alpha_output = post_process(inference(model_path, pre_process(alpha))) # BGR # alpha_output = cv2.cvtColor(alpha_output, cv2.COLOR_BGR2GRAY) # GRAY # img = img[:, :, 0:3] # BGR # image_output = post_process(inference(model_path, pre_process(img))) # BGR # image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2BGRA) # BGRA # image_output[:, :, 3] = alpha_output # print("upscale3","img.shape[2] == 3") image_output = post_process(inference(model_path, pre_process(img))) # BGR return image_output with gr.Blocks() as app: with gr.Row(): with gr.Column(): gr.Button("FurnituRemove").click(removeFurniture, inputs=[gr.Image(label="img", type="pil"), gr.Image(label="mask", type="pil"), gr.Textbox(label="positive_prompt",value="empty room"), gr.Textbox(label="negative_prompt",value=""), gr.Number(label="num_of_images",value=2), gr.Number(label="resolution",value=512) ], outputs=[ gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image()]) with gr.Column(): gr.Button("Segmentation").click(segmentation, inputs=gr.Image(type="pil"), outputs=gr.JSON()) with gr.Column(): gr.Button("Upscale").click(upscale, inputs=[gr.Image(type="pil"),gr.Textbox(label="prompt",value="empty room")], outputs=gr.Image()) with gr.Column(): gr.Button("Upscale2").click(upscale2, inputs=[gr.Image(type="pil"),gr.Textbox(label="prompt",value="empty room")], outputs=gr.Image()) with gr.Column(): gr.Button("Upscale3").click(upscale3, inputs=[gr.Image(type="pil")], outputs=gr.Image()) app.launch(debug=True,share=True) # UP 1