Spaces:

naderasadi
/

DesignGenie

Build error

App Files Files Community

naderasadi commited on Jul 18, 2023

Commit

5b2ab1c

1 Parent(s): 8f5320e

Initial commit

Browse files

Files changed (20) hide show

app.py +23 -0
examples/notebooks/demo_controlnet.ipynb +0 -0
examples/notebooks/demo_sam.ipynb +0 -0
requirements.txt +4 -0
src/designgenie/__init__.py +1 -0
src/designgenie/data/__init__.py +1 -0
src/designgenie/data/image_folder.py +69 -0
src/designgenie/interfaces/__init__.py +1 -0
src/designgenie/interfaces/gradio_interface.py +240 -0
src/designgenie/models/__init__.py +6 -0
src/designgenie/models/diffusion/__init__.py +15 -0
src/designgenie/models/diffusion/controlnet.py +160 -0
src/designgenie/models/diffusion/controlnet_inpaint.py +125 -0
src/designgenie/models/segmentation/__init__.py +16 -0
src/designgenie/models/segmentation/maskformer.py +122 -0
src/designgenie/pipelines/__init__.py +1 -0
src/designgenie/pipelines/inpaint_pipeline.py +81 -0
src/designgenie/utils/__init__.py +6 -0
src/designgenie/utils/helper.py +55 -0
src/designgenie/utils/segmentation_utils.py +287 -0

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from src.designgenie.interfaces import GradioApp
+# def run_pipeline():
+#     pipe = InpaintPipeline(
+#         segmentation_model_name="mask2former",
+#         diffusion_model_name="controlnet_inpaint",
+#         control_model_name="mlsd",
+#         images_root="/home/nader/Projects/DesignGenie/assets/images/",
+#         prompts_path="/home/nader/Projects/DesignGenie/assets/prompts.txt",
+#         image_size=(768, 512),
+#         image_extensions=(".jpg", ".jpeg", ".png", ".webp"),
+#     )
+#     results = pipe.run()
+#     for i, images in enumerate(results):
+#         for j, image in enumerate(images):
+#             image.save(f"./assets/results/result_{i}_{j}.png")
+if __name__ == "__main__":
+    app = GradioApp()
+    app.interface.launch(share=True)

examples/notebooks/demo_controlnet.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

examples/notebooks/demo_sam.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+torch
+transformers
+diffusers

src/designgenie/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .pipelines import InpaintPipeline

src/designgenie/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .image_folder import ImageFolderDataset

src/designgenie/data/image_folder.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from typing import Any, List, Optional, Tuple, Union
+import os
+from PIL import Image
+from random import randint, choices
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+from diffusers.utils import load_image
+class ImageFolderDataset(Dataset):
+    """Dataset class for loading images and prompts from a folder and file path.
+    Args:
+        images_root (str):
+            Path to the folder containing images.
+        prompts_path (str):
+            Path to the file containing prompts.
+        image_size (Tuple[int, int]):
+            Size of the images to be loaded.
+        extensions (Tuple[str]):
+            Tuple of valid image extensions.
+    """
+    def __init__(
+        self,
+        images_root: str,
+        prompts_path: Optional[str] = None,
+        image_size: Tuple[int, int] = (512, 512),
+        extensions: Tuple[str] = (".jpg", ".jpeg", ".png", ".webp"),
+    ) -> None:
+        super().__init__()
+        self.image_size = image_size
+        self.images_paths, self.prompts = self._make_dataset(
+            images_root=images_root, extensions=extensions, prompts_path=prompts_path
+        )
+        self.to_tensor = transforms.ToTensor()
+    def _make_dataset(
+        self,
+        images_root: str,
+        extensions: Tuple[str],
+        prompts_path: Optional[str] = None,
+    ) -> Tuple[List[str], Union[None, List[str]]]:
+        images_paths = []
+        for root, _, fnames in sorted(os.walk(images_root)):
+            for fname in sorted(fnames):
+                if fname.lower().endswith(extensions):
+                    images_paths.append(os.path.join(root, fname))
+        if prompts_path is not None:
+            with open(prompts_path, "r") as f:
+                prompts = f.readlines()
+        else:
+            prompts = None
+        return images_paths, prompts
+    def __len__(self) -> int:
+        return len(self.images_paths)
+    def __getitem__(self, idx: int) -> Tuple[Image.Image, Union[None, str]]:
+        image = load_image(self.images_paths[idx]).resize(self.image_size)
+        prompt = self.prompts[idx] if self.prompts is not None else None
+        return self.to_tensor(image), prompt

src/designgenie/interfaces/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .gradio_interface import GradioApp

src/designgenie/interfaces/gradio_interface.py ADDED Viewed

	@@ -0,0 +1,240 @@

+from typing import Any, Dict, List, Optional, Tuple, Union
+from dataclasses import dataclass, field
+import cv2
+import gradio as gr
+import numpy as np
+from PIL import Image
+import torch
+from ..models import create_diffusion_model, create_segmentation_model
+from ..utils import (
+    get_masked_images,
+    visualize_segmentation_map,
+    get_masks_from_segmentation_map,
+)
+# points color and marker
+COLOR = (255, 0, 0)
+@dataclass
+class AppState:
+    """A class to store the memory state of the Gradio App."""
+    original_image: Image.Image = None
+    predicted_semantic_map: torch.Tensor = None
+    input_coordinates: List[int] = field(default_factory=list)
+    n_outputs: int = 2
+class GradioApp:
+    def __init__(self):
+        self._interface = self.build_interface()
+        self.state = AppState()
+        self.segmentation_model = None
+        self.diffusion_model = None
+    @property
+    def interface(self):
+        return self._interface
+    def _segment_input(self, image: Image.Image, model_name: str) -> Image.Image:
+        """Segment the input image using the given model."""
+        if self.segmentation_model is None:
+            self.segmentation_model = create_segmentation_model(
+                segmentation_model_name=model_name
+            )
+        predicted_semantic_map = self.segmentation_model.process([image])[0]
+        self.state.predicted_semantic_map = predicted_semantic_map
+        segmentation_map = visualize_segmentation_map(predicted_semantic_map, image)
+        return segmentation_map
+    def _generate_outputs(
+        self,
+        prompt: str,
+        model_name: str,
+        n_outputs: int,
+        inference_steps: int,
+        strength: float,
+        guidance_scale: float,
+        eta: float,
+    ) -> Image.Image:
+        if self.diffusion_model is None:
+            self.diffusion_model = create_diffusion_model(
+                diffusion_model_name="controlnet_inpaint", control_model_name=model_name
+            )
+        control_image = self.diffusion_model.generate_control_images(
+            images=[self.state.original_image]
+        )[0]
+        image_mask, masked_control_image = get_masked_images(
+            control_image,
+            self.state.predicted_semantic_map,
+            self.state.input_coordinates,
+        )
+        outputs = self.diffusion_model.process(
+            images=[self.state.original_image],
+            prompts=[prompt],
+            mask_images=[image_mask],
+            control_images=[masked_control_image],
+            negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
+            n_outputs=n_outputs,
+        )
+        return (
+            *outputs["output_images"][0],
+            control_image,
+            image_mask,
+        )
+    def image_change(self, input_image):
+        input_image = input_image.resize((768, 512))
+        self.state.original_image = input_image
+        return input_image
+    def clear_coordinates(self):
+        self.state.input_coordinates = []
+    def get_coordinates(self, event: gr.SelectData, input_image: Image.Image):
+        w, h = tuple(event.index)
+        self.state.input_coordinates.append((h, w))
+        print(self.state.input_coordinates)
+        return Image.fromarray(
+            cv2.drawMarker(
+                np.asarray(input_image), event.index, COLOR, markerSize=20, thickness=5
+            )
+        )
+    def build_interface(self):
+        """Builds the Gradio interface for the DesignGenie app."""
+        with gr.Blocks() as designgenie_interface:
+            # --> App Header <---
+            with gr.Row():
+                # --> Description <--
+                with gr.Column():
+                    gr.Markdown(
+                        """
+                        # DesignGenie
+                        An AI copilot for home interior design. It identifies various sections of your home and generates personalized designs for the selected sections using ContolNet and StableDiffusion.
+                        """
+                    )
+                # --> Model Selection <--
+                with gr.Column():
+                    with gr.Row():
+                        segmentation_model = gr.Dropdown(
+                            choices=["mask2former", "maskformer"],
+                            label="Segmentation Model",
+                            value="mask2former",
+                            interactive=True,
+                        )
+                        controlnet_model = gr.Dropdown(
+                            choices=["mlsd", "soft_edge", "hed", "scribble"],
+                            label="Controlnet Module",
+                            value="mlsd",
+                            interactive=True,
+                        )
+            # --> Model Parameters <--
+            with gr.Accordion(label="Parameters", open=False):
+                with gr.Column():
+                    gr.Markdown("### Stable Diffusion Parameters")
+                    with gr.Row():
+                        with gr.Column():
+                            inference_steps = gr.Number(
+                                value=30, label="Number of inference steps."
+                            )
+                            strength = gr.Number(value=1.0, label="Strength.")
+                        with gr.Column():
+                            guidance_scale = gr.Number(value=7.5, label="Guidance scale.")
+                            eta = gr.Number(value=0.0, label="Eta.")
+            with gr.Row().style(equal_height=False):
+                with gr.Column():
+                    # --> Input Image and Segmentation <--
+                    input_image = gr.Image(label="Input Image", type="pil")
+                    input_image.select(
+                        self.get_coordinates,
+                        inputs=[input_image],
+                        outputs=[input_image],
+                    )
+                    input_image.upload(
+                        self.image_change, inputs=[input_image], outputs=[input_image]
+                    )
+                    with gr.Row():
+                        gr.Markdown(
+                            """
+                            1. Select your input image.
+                            2. Click on `Segment Image` button.
+                            3. Choose the segments that you want to redisgn by simply clicking on the image.
+                            """
+                        )
+                        with gr.Column():
+                            segment_btn = gr.Button(
+                                value="Segment Image", variant="primary"
+                            )
+                            clear_btn = gr.Button(value="Clear")
+                            segment_btn.click(
+                                self._segment_input,
+                                inputs=[input_image, segmentation_model],
+                                outputs=input_image,
+                            )
+                            clear_btn.click(self.clear_coordinates)
+                    # --> Prompt and Num Outputs <--
+                    text = gr.Textbox(
+                        label="Text prompt(optional)",
+                        info="You can describe how the model should redesign the selected segments of your home.",
+                    )
+                    num_outputs = gr.Slider(
+                        value=3,
+                        minimum=1,
+                        maximum=5,
+                        step=1,
+                        interactive=True,
+                        label="Number of Generated Outputs",
+                        info="Number of design outputs you want the model to generate.",
+                    )
+                    submit_btn = gr.Button(value="Submit", variant="primary")
+                with gr.Column():
+                    with gr.Tab(label="Output Images"):
+                        output_images = [
+                            gr.Image(
+                                interactive=False, label=f"Output Image {i}", type="pil"
+                            )
+                            for i in range(3)
+                        ]
+                    with gr.Tab(label="Control Images"):
+                        control_labels = ["Control Image", "Generated Mask"]
+                        control_images = [
+                            gr.Image(interactive=False, label=label, type="pil")
+                            for label in control_labels
+                        ]
+                submit_btn.click(
+                    self._generate_outputs,
+                    inputs=[
+                        text,
+                        controlnet_model,
+                        num_outputs,
+                        inference_steps,
+                        strength,
+                        guidance_scale,
+                        eta,
+                    ],
+                    outputs=output_images + control_images,
+                )
+        return designgenie_interface

src/designgenie/models/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .diffusion import (
+    StableDiffusionControlNet,
+    StableDiffusionControlNetInpaint,
+    create_diffusion_model,
+)
+from .segmentation import MaskFormer, Mask2Former, create_segmentation_model

src/designgenie/models/diffusion/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .controlnet import StableDiffusionControlNet
+from .controlnet_inpaint import StableDiffusionControlNetInpaint
+DIFFUSION_MODELS = {
+    "controlnet": StableDiffusionControlNet,
+    "controlnet_inpaint": StableDiffusionControlNetInpaint,
+}
+def create_diffusion_model(diffusion_model_name: str, **kwargs):
+    assert (
+        diffusion_model_name in DIFFUSION_MODELS.keys()
+    ), "Diffusion model name must be one of " + ", ".join(DIFFUSION_MODELS.keys())
+    return DIFFUSION_MODELS[diffusion_model_name](**kwargs)

src/designgenie/models/diffusion/controlnet.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from typing import Any, List, Optional, Tuple, Union
+import itertools
+from PIL import Image
+import numpy as np
+import torch
+from controlnet_aux import MLSDdetector, PidiNetDetector, HEDdetector
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+    UniPCMultistepScheduler,
+)
+MODEL_DICT = {
+    "mlsd": {
+        "name": "lllyasviel/Annotators",
+        "detector": MLSDdetector,
+        "model": "lllyasviel/control_v11p_sd15_mlsd",
+    },
+    "soft_edge": {
+        "name": "lllyasviel/Annotators",
+        "detector": PidiNetDetector,
+        "model": "lllyasviel/control_v11p_sd15_softedge",
+    },
+    "hed": {
+        "name": "lllyasviel/Annotators",
+        "detector": HEDdetector,
+        "model": "lllyasviel/sd-controlnet-hed",
+    },
+    "scribble": {
+        "name": "lllyasviel/Annotators",
+        "detector": HEDdetector,
+        "model": "lllyasviel/control_v11p_sd15_scribble",
+    },
+}
+class StableDiffusionControlNet:
+    """ControlNet pipeline for generating images from prompts.
+    Args:
+        control_model_name (str):
+            Name of the controlnet processor.
+        sd_model_name (str):
+            Name of the StableDiffusion model.
+    """
+    def __init__(
+        self,
+        control_model_name: str,
+        sd_model_name: Optional[str] = "runwayml/stable-diffusion-v1-5",
+    ) -> None:
+        self.processor = MODEL_DICT[control_model_name]["detector"].from_pretrained(
+            MODEL_DICT[control_model_name]["name"]
+        )
+        self.pipe = self.create_pipe(
+            sd_model_name=sd_model_name, control_model_name=control_model_name
+        )
+    def _repeat(self, items: List[Any], n: int) -> List[Any]:
+        """Repeat items in a list n times.
+        Args:
+            items (List[Any]): List of items to be repeated.
+            n (int): Number of repetitions.
+        Returns:
+            List[Any]: List of repeated items.
+        """
+        return list(
+            itertools.chain.from_iterable(itertools.repeat(item, n) for item in items)
+        )
+    def generate_control_images(self, images: List[Image.Image]) -> List[Image.Image]:
+        """Generate control images from input images.
+        Args:
+            images (List[Image.Image]): Input images.
+        Returns:
+            List[Image.Image]: Control images.
+        """
+        return [self.processor(image) for image in images]
+    def create_pipe(
+        self, sd_model_name: str, control_model_name: str
+    ) -> StableDiffusionControlNetPipeline:
+        """Create a StableDiffusionControlNetPipeline.
+        Args:
+            sd_model_name (str): StableDiffusion model name.
+            control_model_name (str): Name of the ControlNet module.
+        Returns:
+            StableDiffusionControlNetPipeline
+        """
+        controlnet = ControlNetModel.from_pretrained(
+            MODEL_DICT[control_model_name]["model"], torch_dtype=torch.float16
+        )
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            sd_model_name, controlnet=controlnet, torch_dtype=torch.float16
+        )
+        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+        pipe.enable_model_cpu_offload()
+        pipe.enable_xformers_memory_efficient_attention()
+        return pipe
+    def process(
+        self,
+        images: List[Image.Image],
+        prompts: List[str],
+        negative_prompt: Optional[str] = None,
+        n_outputs: Optional[int] = 1,
+        num_inference_steps: Optional[int] = 30,
+    ) -> List[List[Image.Image]]:
+        """Generate images from `prompts` using `control_images` and `negative_prompt`.
+        Args:
+            images (List[Image.Image]): Input images.
+            prompts (List[str]): List of prompts.
+            negative_prompt (Optional[str], optional): Negative prompt. Defaults to None.
+            n_outputs (Optional[int], optional): Number of generated outputs. Defaults to 1.
+            num_inference_steps (Optional[int], optional): Number of inference iterations. Defaults to 30.
+        Returns:
+            List[List[Image.Image]]
+        """
+        control_images = self.generate_control_images(images)
+        assert len(prompts) == len(
+            control_images
+        ), "Number of prompts and input images must be equal."
+        if n_outputs > 1:
+            prompts = self._repeat(prompts, n=n_outputs)
+            control_images = self._repeat(control_images, n=n_outputs)
+        generator = [
+            torch.Generator(device="cuda").manual_seed(int(i))
+            for i in np.random.randint(len(prompts), size=len(prompts))
+        ]
+        output = self.pipe(
+            prompts,
+            image=control_images,
+            negative_prompt=[negative_prompt] * len(prompts),
+            num_inference_steps=num_inference_steps,
+            generator=generator,
+        )
+        output_images = [
+            output.images[idx * n_outputs : (idx + 1) * n_outputs]
+            for idx in range(len(images))
+        ]
+        return output_images

src/designgenie/models/diffusion/controlnet_inpaint.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from typing import List, Optional, Tuple, Union
+from PIL import Image
+import numpy as np
+import torch
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetInpaintPipeline,
+    UniPCMultistepScheduler,
+)
+from .controlnet import StableDiffusionControlNet, MODEL_DICT
+class StableDiffusionControlNetInpaint(StableDiffusionControlNet):
+    """StableDiffusion with ControlNet model for inpainting images based on prompts.
+    Args:
+        control_model_name (str):
+            Name of the controlnet processor.
+        sd_model_name (str):
+            Name of the StableDiffusion model.
+    """
+    def __init__(
+        self,
+        control_model_name: str,
+        sd_model_name: Optional[str] = "runwayml/stable-diffusion-inpainting",
+    ) -> None:
+        super().__init__(
+            control_model_name=control_model_name,
+            sd_model_name=sd_model_name,
+        )
+    def create_pipe(
+        self, sd_model_name: str, control_model_name: str
+    ) -> StableDiffusionControlNetInpaintPipeline:
+        """Create a StableDiffusionControlNetInpaintPipeline.
+        Args:
+            sd_model_name (str): StableDiffusion model name.
+            control_model_name (str): Name of the ControlNet module.
+        Returns:
+            StableDiffusionControlNetInpaintPipeline
+        """
+        controlnet = ControlNetModel.from_pretrained(
+            MODEL_DICT[control_model_name]["model"], torch_dtype=torch.float16
+        )
+        pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+            sd_model_name, controlnet=controlnet, torch_dtype=torch.float16
+        )
+        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+        pipe.enable_model_cpu_offload()
+        pipe.enable_xformers_memory_efficient_attention()
+        return pipe
+    def process(
+        self,
+        images: List[Image.Image],
+        prompts: List[str],
+        mask_images: List[Image.Image],
+        control_images: Optional[List[Image.Image]] = None,
+        negative_prompt: Optional[str] = None,
+        n_outputs: Optional[int] = 1,
+        num_inference_steps: Optional[int] = 30,
+        strength: Optional[float] = 1.0,
+        guidance_scale: Optional[float] = 7.5,
+        eta: Optional[float] = 0.0,
+    ) -> List[List[Image.Image]]:
+        """Inpaint images based on `prompts` using `control_images` and `mask_images`.
+        Args:
+            images (List[Image.Image]): Input images.
+            prompts (List[str]): List of prompts.
+            mask_images (List[Image.Image]): List of mask images.
+            control_images (Optional[List[Image.Image]], optional): List of control images. Defaults to None.
+            negative_prompt (Optional[str], optional): Negative prompt. Defaults to None.
+            n_outputs (Optional[int], optional): Number of generated outputs. Defaults to 1.
+            num_inference_steps (Optional[int], optional): Number of inference iterations. Defaults to 30.
+        Returns:
+            List[List[Image.Image]]
+        """
+        if control_images is None:
+            control_images = self.generate_control_images(images)
+        assert len(prompts) == len(
+            control_images
+        ), "Number of prompts and input images must be equal."
+        if n_outputs > 1:
+            prompts = self._repeat(prompts, n=n_outputs)
+            images = self._repeat(images, n=n_outputs)
+            control_images = self._repeat(control_images, n=n_outputs)
+            mask_images = self._repeat(mask_images, n=n_outputs)
+        generator = [
+            torch.Generator(device="cuda").manual_seed(int(i))
+            for i in np.random.randint(max(len(prompts), 16), size=len(prompts))
+        ]
+        output = self.pipe(
+            prompts,
+            image=images,
+            control_image=control_images,
+            mask_image=mask_images,
+            negative_prompt=[negative_prompt] * len(prompts),
+            num_inference_steps=num_inference_steps,
+            generator=generator,
+        )
+        output_images = [
+            output.images[idx * n_outputs : (idx + 1) * n_outputs]
+            for idx in range(len(images) // n_outputs)
+        ]
+        return {
+            "output_images": output_images,
+            "control_images": control_images,
+            "mask_images": mask_images,
+        }

src/designgenie/models/segmentation/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from .maskformer import MaskFormer, Mask2Former
+SEGMENTATION_MODEL_DICT = {
+    "maskformer": MaskFormer,
+    "mask2former": Mask2Former,
+}
+def create_segmentation_model(segmentation_model_name: str, **kwargs):
+    assert (
+        segmentation_model_name in SEGMENTATION_MODEL_DICT.keys()
+    ), "Segmentation model name must be one of " + ", ".join(
+        SEGMENTATION_MODEL_DICT.keys()
+    )
+    return SEGMENTATION_MODEL_DICT[segmentation_model_name](**kwargs)

src/designgenie/models/segmentation/maskformer.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from typing import Any, List, Optional, Tuple, Union
+from PIL import Image
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from transformers import (
+    AutoImageProcessor,
+    Mask2FormerForUniversalSegmentation,
+    MaskFormerImageProcessor,
+    MaskFormerForInstanceSegmentation,
+)
+class MaskFormer:
+    """MaskFormer semantic segmentation model.
+    Args:
+        model_size (str, optional):
+            Size of the MaskFormer model. Defaults to "large".
+    """
+    def __init__(self, model_size: Optional[str] = "large") -> None:
+        assert model_size in [
+            "tiny",
+            "base",
+            "large",
+        ], "Model size must be one of 'tiny', 'base', or 'large'"
+        self.processor = MaskFormerImageProcessor.from_pretrained(
+            f"facebook/maskformer-swin-{model_size}-ade"
+        )
+        self.model = MaskFormerForInstanceSegmentation.from_pretrained(
+            f"facebook/maskformer-swin-{model_size}-ade"
+        )
+    def process(self, images: List[Image.Image]):
+        inputs = self.processor(images=images, return_tensors="pt")
+        outputs = self.model(**inputs)
+        # model predicts class_queries_logits of shape `(batch_size, num_queries)`
+        # and masks_queries_logits of shape `(batch_size, num_queries, height, width)`
+        class_queries_logits = outputs.class_queries_logits
+        masks_queries_logits = outputs.masks_queries_logits
+        # you can pass them to processor for postprocessing
+        # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
+        predicted_semantic_maps = self.processor.post_process_semantic_segmentation(
+            outputs, target_sizes=[images[0].size[::-1] * len(images)]
+        )
+        return predicted_semantic_maps
+class Mask2Former(MaskFormer):
+    """Mask2Former semantic segmentation model.
+    Args:
+        model_size (str, optional):
+            Size of the Mask2Former model. Defaults to "large".
+    """
+    def __init__(self, model_size: Optional[str] = "large") -> None:
+        assert model_size in [
+            "tiny",
+            "base",
+            "large",
+        ], "Model size must be one of 'tiny', 'base', or 'large'"
+        self.processor = AutoImageProcessor.from_pretrained(
+            f"facebook/mask2former-swin-{model_size}-ade-semantic"
+        )
+        self.model = Mask2FormerForUniversalSegmentation.from_pretrained(
+            f"facebook/mask2former-swin-{model_size}-ade-semantic"
+        )
+# class ADESegmentation:
+#     def __init__(self, model_name: str):
+#         self.processor = MODEL_DICT[model_name]["processor"].from_pretrained(
+#             MODEL_DICT[model_name]["name"]
+#         )
+#         self.model = MODEL_DICT[model_name]["model"].from_pretrained(
+#             MODEL_DICT[model_name]["name"]
+#         )
+#     def predict(self, image: Image.Image):
+#         inputs = processor(images=image, return_tensors="pt")
+#         outputs = model(**inputs)
+#         # model predicts class_queries_logits of shape `(batch_size, num_queries)`
+#         # and masks_queries_logits of shape `(batch_size, num_queries, height, width)`
+#         class_queries_logits = outputs.class_queries_logits
+#         masks_queries_logits = outputs.masks_queries_logits
+#         # you can pass them to processor for postprocessing
+#         # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
+#         predicted_semantic_maps = processor.post_process_semantic_segmentation(
+#             outputs, target_sizes=[image.size[::-1]]
+#         )
+#         return predicted_semantic_maps
+#     def get_mask(self, predicted_semantic_maps, class_id: int):
+#         masks, labels, obj_names = get_masks_from_segmentation_map(
+#             predicted_semantic_maps[0]
+#         )
+#         mask = masks[labels.index(ID)]
+#         object_mask = np.logical_not(mask).astype(int)
+#         mask = torch.Tensor(mask).repeat(3, 1, 1)
+#         object_mask = torch.Tensor(object_mask).repeat(3, 1, 1)
+#         return mask, object_mask
+#     def get_PIL_mask(self, predicted_semantic_maps, class_id: int):
+#         mask, object_mask = self.get_mask(predicted_semantic_maps[0], class_id=class_id)
+#         mask = transforms.ToPILImage()(mask)
+#         object_mask = transforms.ToPILImage()(object_mask)
+#         return mask, object_mask
+#     def get_PIL_segmentation_map(self, predicted_semantic_maps):
+#         return visualize_segmentation_map(predicted_semantic_maps[0])

src/designgenie/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .inpaint_pipeline import InpaintPipeline

src/designgenie/pipelines/inpaint_pipeline.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import Any, Dict, List, Optional, Tuple, Union
+import torch
+from torch.utils.data import DataLoader
+from torchvision.transforms.functional import to_pil_image
+from ..data import ImageFolderDataset
+from ..models import create_diffusion_model, create_segmentation_model
+from ..utils import get_masked_images
+class InpaintPipeline:
+    def __init__(
+        self,
+        segmentation_model_name: str,
+        diffusion_model_name: str,
+        control_model_name: str,
+        images_root: str,
+        prompts_path: Optional[str] = None,
+        sd_model_name: Optional[str] = "runwayml/stable-diffusion-inpainting",
+        image_size: Optional[Tuple[int, int]] = (512, 512),
+        image_extensions: Optional[Tuple[str]] = (".jpg", ".jpeg", ".png", ".webp"),
+        segmentation_model_size: Optional[str] = "large",
+    ):
+        self.segmentation_model = create_segmentation_model(
+            segmentation_model_name=segmentation_model_name,
+            model_size=segmentation_model_size,
+        )
+        self.diffusion_model = create_diffusion_model(
+            diffusion_model_name=diffusion_model_name,
+            control_model_name=control_model_name,
+            sd_model_name=sd_model_name,
+        )
+        self.data_loader = self.build_data_loader(
+            images_root=images_root,
+            prompts_path=prompts_path,
+            image_size=image_size,
+            image_extensions=image_extensions,
+        )
+    def build_data_loader(
+        self,
+        images_root: str,
+        prompts_path: Optional[str] = None,
+        image_size: Optional[Tuple[int, int]] = (512, 512),
+        image_extensions: Optional[Tuple[str]] = (".jpg", ".jpeg", ".png", ".webp"),
+        batch_size: Optional[int] = 1,
+    ) -> DataLoader:
+        dataset = ImageFolderDataset(
+            images_root, prompts_path, image_size, image_extensions
+        )
+        data_loader = DataLoader(
+            dataset, batch_size=batch_size, shuffle=False, num_workers=8
+        )
+        return data_loader
+    def run(self, data_loader: Optional[DataLoader] = None) -> List[Dict[str, Any]]:
+        if data_loader is not None:
+            self.data_loader = data_loader
+        results = []
+        for idx, (images, prompts) in enumerate(self.data_loader):
+            images = [to_pil_image(img) for img in images]
+            semantic_maps = self.segmentation_model.process(images)
+            object_masks = [
+                get_object_mask(seg_map, class_id=0) for seg_map in semantic_maps
+            ]
+            outputs = self.diffusion_model.process(
+                images=images,
+                prompts=[prompts[0]],
+                mask_images=object_masks,
+                negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
+            )
+            results += outputs["output_images"]
+        return results

src/designgenie/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .segmentation_utils import (
+    get_masked_images,
+    visualize_segmentation_map,
+    get_masks_from_segmentation_map,
+)
+from .helper import WandBLogger, parser

src/designgenie/utils/helper.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from typing import List, Tuple, Dict, Union, Any, Optional
+import argparse
+from PIL import Image
+import wandb
+class WandBLogger:
+    def __init__(self, config: Dict[str, Any]):
+        assert "wandb_project" in config, "Missing `wandb_project` in config"
+        self.wandb = wandb.init(
+            project=config.wandb_project, name=config.exp_name, config=config
+        )
+    def log_scalars(self, logs: Dict[str, Union[int, float]]):
+        self.wandb.log(logs)
+    def log_images(self, logs: Dict[str, List[Image.Image]]):
+        wandb.log(
+            {
+                key: [wandb.Image(image, caption=key) for image in images]
+                for key, images in logs.items()
+            }
+        )
+def parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--segmentation_model", type=str, default="mask2former")
+    parser.add_argument("--controlnet_name", type=str, default="hed")
+    parser.add_argument(
+        "--sd_model", type=str, default="runwayml/stable-diffusion-v1-5"
+    )
+    parser.add_argument(
+        "--images_path",
+        type=str,
+        default="/home/nader/DesignGenie/assets/images/",
+    )
+    parser.add_argument(
+        "--prompts_path",
+        type=str,
+        default="/home/nader/DesignGenie/assets/prompts.txt",
+    )
+    parser.add_argument(
+        "--negative_prompt",
+        type=str,
+        default="monochrome, lowres, bad anatomy, worst quality, low quality",
+    )
+    parser.add_argument("--num_inference_steps", type=int, default=20)
+    parser.add_argument("--n_outputs", type=int, default=4)
+    parser.add_argument("--wandb_project", type=str, default="DesignGenie")
+    parser.add_argument("--wandb", type=int, default=1)
+    parser.add_argument("--exp_name", type=str, default="demo")
+    args = parser.parse_args()
+    return args

src/designgenie/utils/segmentation_utils.py ADDED Viewed

	@@ -0,0 +1,287 @@

+from typing import Any, List, Optional, Tuple, Union
+from functools import reduce
+import numpy as np
+from PIL import Image
+import requests
+import torch
+from torch import nn
+import torchvision.transforms as transforms
+from torchvision.transforms.functional import to_pil_image
+def visualize_segmentation_map(
+    semantic_map: torch.Tensor, original_image: Image.Image
+) -> Image.Image:
+    """
+    Visualizes a segmentation map by overlaying it on the original image.
+    Args:
+        semantic_map (torch.Tensor): Segmentation map tensor.
+        original_image (Image.Image): Original image.
+    Returns:
+        Image.Image: Overlay image with segmentation map.
+    """
+    # Convert to RGB
+    color_seg = np.zeros(
+        (semantic_map.shape[0], semantic_map.shape[1], 3), dtype=np.uint8
+    )  # height, width, 3
+    palette = np.array(ade_palette())
+    for label, color in enumerate(palette):
+        color_seg[semantic_map == label, :] = color
+    # Convert to BGR
+    color_seg = color_seg[..., ::-1]
+    # Show image + mask
+    img = np.array(original_image) * 0.5 + color_seg * 0.5
+    img = img.astype(np.uint8)
+    return Image.fromarray(img)
+def get_masks_from_segmentation_map(
+    semantic_map: torch.Tensor,
+) -> Tuple[List[np.array], List[int], List[str]]:
+    """
+    Extracts masks, labels, and object names from a segmentation map.
+    Args:
+        semantic_map (torch.Tensor): Segmentation map tensor.
+    Returns:
+        Tuple[List[np.array], List[int], List[str]]: Tuple containing masks, labels, and object names.
+    """
+    masks = []
+    labels = []
+    obj_names = []
+    for label, color in enumerate(np.array(ade_palette())):
+        mask = np.ones(
+            (semantic_map.shape[0], semantic_map.shape[1]), dtype=np.uint8
+        )  # height, width
+        indices = semantic_map == label
+        mask[indices] = 0
+        if indices.sum() > 0:
+            masks.append(mask)
+            labels.append(label)
+            obj_names.append(ADE_LABELS[str(label)])
+    return masks, labels, obj_names
+def get_mask_from_coordinates(
+    segmentation_maps: List[np.array], coordinates: Tuple[int, int]
+):
+    """
+    Retrieves a mask from a list of segmentation maps based on given coordinates.
+    Args:
+        segmentation_maps (List[np.array]): List of segmentation maps.
+        coordinates (Tuple[int, int]): Coordinates to filter the masks.
+    Returns:
+        np.array: Combined mask from the segmentation maps.
+    """
+    masks = []
+    for seg_map in segmentation_maps:
+        for coordinate in coordinates:
+            if seg_map[coordinate] == 0:
+                masks.append(seg_map)
+    return reduce(np.multiply, masks)
+def get_masked_images(
+    control_image: Image.Image,
+    semantic_map: torch.Tensor,
+    coordinates: List[Tuple[int, int]],
+    return_tensors: bool = False,
+) -> Union[torch.Tensor, Image.Image]:
+    """
+    Retrieves masked images based on given control image, segmentation map, and coordinates.
+    Args:
+        control_image (Image.Image): Control image.
+        semantic_map (torch.Tensor): Segmentation map tensor.
+        coordinates (List[Tuple[int, int]]): List of coordinates.
+        return_tensors (bool, optional): Whether to return masked images as tensors. Defaults to False.
+    Returns:
+        Union[torch.Tensor, Image.Image]: Masked image tensor or PIL image.
+    """
+    masks, labels, obj_names = get_masks_from_segmentation_map(semantic_map)
+    mask = get_mask_from_coordinates(masks, coordinates)
+    mask_image = np.logical_not(mask).astype(int)
+    mask_image = torch.Tensor(mask_image).repeat(3, 1, 1)
+    mask = torch.Tensor(mask).repeat(3, 1, 1)
+    control_image = transforms.ToTensor()(control_image)
+    masked_control_image = transforms.ToPILImage()(mask * control_image)
+    if not return_tensors:
+        mask_image = to_pil_image(mask_image)
+    return mask_image, masked_control_image
+ADE_LABELS = requests.get(
+    "https://huggingface.co/datasets/huggingface/label-files/raw/main/ade20k-id2label.json"
+).json()
+def ade_palette():
+    """ADE20K palette that maps each class to RGB values."""
+    return [
+        [120, 120, 120],
+        [180, 120, 120],
+        [6, 230, 230],
+        [80, 50, 50],
+        [4, 200, 3],
+        [120, 120, 80],
+        [140, 140, 140],
+        [204, 5, 255],
+        [230, 230, 230],
+        [4, 250, 7],
+        [224, 5, 255],
+        [235, 255, 7],
+        [150, 5, 61],
+        [120, 120, 70],
+        [8, 255, 51],
+        [255, 6, 82],
+        [143, 255, 140],
+        [204, 255, 4],
+        [255, 51, 7],
+        [204, 70, 3],
+        [0, 102, 200],
+        [61, 230, 250],
+        [255, 6, 51],
+        [11, 102, 255],
+        [255, 7, 71],
+        [255, 9, 224],
+        [9, 7, 230],
+        [220, 220, 220],
+        [255, 9, 92],
+        [112, 9, 255],
+        [8, 255, 214],
+        [7, 255, 224],
+        [255, 184, 6],
+        [10, 255, 71],
+        [255, 41, 10],
+        [7, 255, 255],
+        [224, 255, 8],
+        [102, 8, 255],
+        [255, 61, 6],
+        [255, 194, 7],
+        [255, 122, 8],
+        [0, 255, 20],
+        [255, 8, 41],
+        [255, 5, 153],
+        [6, 51, 255],
+        [235, 12, 255],
+        [160, 150, 20],
+        [0, 163, 255],
+        [140, 140, 140],
+        [250, 10, 15],
+        [20, 255, 0],
+        [31, 255, 0],
+        [255, 31, 0],
+        [255, 224, 0],
+        [153, 255, 0],
+        [0, 0, 255],
+        [255, 71, 0],
+        [0, 235, 255],
+        [0, 173, 255],
+        [31, 0, 255],
+        [11, 200, 200],
+        [255, 82, 0],
+        [0, 255, 245],
+        [0, 61, 255],
+        [0, 255, 112],
+        [0, 255, 133],
+        [255, 0, 0],
+        [255, 163, 0],
+        [255, 102, 0],
+        [194, 255, 0],
+        [0, 143, 255],
+        [51, 255, 0],
+        [0, 82, 255],
+        [0, 255, 41],
+        [0, 255, 173],
+        [10, 0, 255],
+        [173, 255, 0],
+        [0, 255, 153],
+        [255, 92, 0],
+        [255, 0, 255],
+        [255, 0, 245],
+        [255, 0, 102],
+        [255, 173, 0],
+        [255, 0, 20],
+        [255, 184, 184],
+        [0, 31, 255],
+        [0, 255, 61],
+        [0, 71, 255],
+        [255, 0, 204],
+        [0, 255, 194],
+        [0, 255, 82],
+        [0, 10, 255],
+        [0, 112, 255],
+        [51, 0, 255],
+        [0, 194, 255],
+        [0, 122, 255],
+        [0, 255, 163],
+        [255, 153, 0],
+        [0, 255, 10],
+        [255, 112, 0],
+        [143, 255, 0],
+        [82, 0, 255],
+        [163, 255, 0],
+        [255, 235, 0],
+        [8, 184, 170],
+        [133, 0, 255],
+        [0, 255, 92],
+        [184, 0, 255],
+        [255, 0, 31],
+        [0, 184, 255],
+        [0, 214, 255],
+        [255, 0, 112],
+        [92, 255, 0],
+        [0, 224, 255],
+        [112, 224, 255],
+        [70, 184, 160],
+        [163, 0, 255],
+        [153, 0, 255],
+        [71, 255, 0],
+        [255, 0, 163],
+        [255, 204, 0],
+        [255, 0, 143],
+        [0, 255, 235],
+        [133, 255, 0],
+        [255, 0, 235],
+        [245, 0, 255],
+        [255, 0, 122],
+        [255, 245, 0],
+        [10, 190, 212],
+        [214, 255, 0],
+        [0, 204, 255],
+        [20, 0, 255],
+        [255, 255, 0],
+        [0, 153, 255],
+        [0, 41, 255],
+        [0, 255, 204],
+        [41, 0, 255],
+        [41, 255, 0],
+        [173, 0, 255],
+        [0, 245, 255],
+        [71, 0, 255],
+        [122, 0, 255],
+        [0, 255, 184],
+        [0, 92, 255],
+        [184, 255, 0],
+        [0, 133, 255],
+        [255, 214, 0],
+        [25, 194, 194],
+        [102, 255, 0],
+        [92, 0, 255],
+    ]