import gradio as gr import spaces import torch from diffusers import AutoencoderKL, TCDScheduler # (Assume ControlNet manual load or from_pretrained is already working) from controlnet_union import ControlNetModel_Union from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline from gradio_imageslider import ImageSlider from huggingface_hub import hf_hub_download from PIL import Image, ImageDraw import numpy as np # --- Load ControlNet and SDXL Fill Pipeline --- # (Either manual download or via from_pretrained) controlnet_model = ControlNetModel_Union.from_pretrained( "xinsir/controlnet-union-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" ).to("cuda") vae = AutoencoderKL.from_pretrained( "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 ).to("cuda") pipe = StableDiffusionXLFillPipeline.from_pretrained( "SG161222/RealVisXL_V5.0_Lightning", torch_dtype=torch.float16, vae=vae, controlnet=controlnet_model, variant="fp16", ).to("cuda") pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config) # --- Utility functions --- def can_expand(source_width, source_height, target_width, target_height, alignment): if alignment in ("Left", "Right") and source_width >= target_width: return False if alignment in ("Top", "Bottom") and source_height >= target_height: return False return True def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom): target = (width, height) scale = min(target[0] / image.width, target[1] / image.height) w, h = int(image.width * scale), int(image.height * scale) src = image.resize((w, h), Image.LANCZOS) # Resize percentage if resize_option == "Full": pct = 100 elif resize_option == "50%": pct = 50 elif resize_option == "33%": pct = 33 elif resize_option == "25%": pct = 25 else: pct = custom_resize_percentage rw, rh = max(int(src.width * pct / 100), 64), max(int(src.height * pct / 100), 64) src = src.resize((rw, rh), Image.LANCZOS) ox = max(int(rw * overlap_percentage / 100), 1) oy = max(int(rh * overlap_percentage / 100), 1) # Margins if alignment == "Middle": mx, my = (width - rw)//2, (height - rh)//2 elif alignment == "Left": mx, my = 0, (height - rh)//2 elif alignment == "Right": mx, my = width - rw, (height - rh)//2 elif alignment == "Top": mx, my = (width - rw)//2, 0 else: mx, my = (width - rw)//2, height - rh mx, my = max(0, min(mx, width - rw)), max(0, min(my, height - rh)) bg = Image.new("RGB", target, (255,255,255)) bg.paste(src, (mx, my)) mask = Image.new("L", target, 255) d = ImageDraw.Draw(mask) lx = mx + (ox if overlap_left else 2) rx = mx + rw - (ox if overlap_right else 2) ty = my + (oy if overlap_top else 2) by = my + rh - (oy if overlap_bottom else 2) # Edge adjustments if alignment == "Left": lx = mx + (ox if overlap_left else 0) if alignment == "Right": rx = mx + rw - (ox if overlap_right else 0) if alignment == "Top": ty = my + (oy if overlap_top else 0) if alignment == "Bottom": by = my + rh - (oy if overlap_bottom else 0) d.rectangle([(lx, ty), (rx, by)], fill=0) return bg, mask def preview_image_and_mask(*args): bg, mask = prepare_image_and_mask(*args) vis = bg.copy().convert("RGBA") red = Image.new("RGBA", bg.size, (255,0,0,64)) overlay = Image.new("RGBA", bg.size, (0,0,0,0)) overlay.paste(red, (0,0), mask) return Image.alpha_composite(vis, overlay) # --- Fixed infer: return list for slider --- @spaces.GPU(duration=24) def infer(image, width, height, overlap_percentage, num_inference_steps, resize_option, custom_resize_percentage, prompt_input, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom): background, mask = prepare_image_and_mask( image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom ) if not can_expand(background.width, background.height, width, height, alignment): alignment = "Middle" hole = background.copy() hole.paste(0, (0,0), mask) final_prompt = f"{prompt_input} , high quality, 4k" embeds = pipe.encode_prompt(final_prompt, "cuda", True) # Run pipeline and grab last frame gen = pipe( prompt_embeds=embeds[0], negative_prompt_embeds=embeds[1], pooled_prompt_embeds=embeds[2], negative_pooled_prompt_embeds=embeds[3], image=hole, num_inference_steps=num_inference_steps ) last = None for img in gen: last = img out = last.convert("RGBA") hole.paste(out, (0,0), mask) # Return a list: [input_hole_image, final_output] return [background, hole] def clear_result(): return gr.update(value=None) def preload_presets(ratio, w, h): if ratio == "9:16": return 720, 1280, gr.update() if ratio == "16:9": return 1280, 720, gr.update() if ratio == "1:1": return 1024, 1024, gr.update() return w, h, gr.update(open=True) def select_the_right_preset(w, h): if (w,h) == (720,1280): return "9:16" if (w,h) == (1280,720): return "16:9" if (w,h) == (1024,1024): return "1:1" return "Custom" def toggle_custom_resize_slider(opt): return gr.update(visible=(opt=="Custom")) def update_history(img, history): history = history or [] history.insert(0, img) return history css = ".gradio-container { width: 1200px !important; }" title = "