Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from diffusers import StableDiffusionXLInpaintPipeline | |
| from PIL import Image, ImageDraw | |
| from transformers import DetrImageProcessor, DetrForObjectDetection | |
| import spaces | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the Stable Diffusion XL Inpainting model | |
| pipe = StableDiffusionXLInpaintPipeline.from_pretrained( | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| variant="fp16", | |
| ).to(device) | |
| # Load the DETR object detection model | |
| processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") | |
| detector = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device) | |
| def detect_and_replace_humans(input_image, prompt): | |
| if input_image is None or prompt == "": | |
| return None | |
| image_np = np.array(input_image) | |
| inputs = processor(images=input_image, return_tensors="pt").to(device) | |
| outputs = detector(**inputs) | |
| target_sizes = torch.tensor([image_np.shape[:2]]).to(device) | |
| results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0] | |
| mask = Image.new("L", input_image.size, 0) | |
| draw = ImageDraw.Draw(mask) | |
| found = False | |
| for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): | |
| if detector.config.id2label[label.item()] == "person": | |
| box = [int(i) for i in box.tolist()] | |
| draw.rectangle(box, fill=255) | |
| found = True | |
| if not found: | |
| return "No human detected." | |
| # Pre-defined positive and negative prompts | |
| positive_prompt = ( | |
| "Replace the masked humans with imaginary Indian bride and groom wearing traditional Indian wedding attire, " | |
| "with detailed embroidery, colorful saree and sherwani, realistic faces, natural skin texture, matching pose, " | |
| "perfect lighting, and the same camera perspective. Keep the background unchanged." | |
| ) | |
| negative_prompt = ( | |
| "blurry, distorted, deformed, double face, extra limbs, low quality, bad proportions, low resolution, " | |
| "changed background, multiple faces, duplicate body parts, cartoon, watermark, text" | |
| ) | |
| # Inpainting process | |
| output = pipe( | |
| prompt=positive_prompt, | |
| negative_prompt=negative_prompt, | |
| image=input_image, | |
| mask_image=mask, | |
| num_inference_steps=40, | |
| guidance_scale=8.5 | |
| ).images[0] | |
| return output | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Replace Humans with Imaginary Indian Bride and Groom (Background Preserved)") | |
| with gr.Row(): | |
| input_image = gr.Image(type="pil", label="Input Image") | |
| output_image = gr.Image(type="pil", label="Output Image") | |
| prompt_text = gr.Textbox(label="Prompt (Optional, Predefined Prompt Used)", placeholder="You can leave this blank") | |
| submit = gr.Button("Submit") | |
| submit.click(detect_and_replace_humans, inputs=[input_image, prompt_text], outputs=output_image) | |
| demo.launch() | |