outpaint-video-zoom

Running on Zero

File size: 9,402 Bytes

edb0494
6405936
 
 
 
 
 
edb0494
6405936
 
edb0494
a7d8817
d49f90c
a7d8817
6405936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ab7724
c37eeb3
e5a75ef
 
8263fc6
e5a75ef
 
 
 
 
 
 
 
 
 
 
 
c37eeb3
 
 
 
 
 
 
 
 
 
 
 
e5a75ef
 
c37eeb3
e5a75ef
 
c37eeb3
e5a75ef
 
 
 
 
 
c37eeb3
e5a75ef
 
6405936
4ab7724
e5a75ef
4ab7724
 
 
 
 
 
 
 
 
6405936
 
 
 
 
 
4ab7724
6405936
4ab7724
6405936
 
a7d8817
6405936
a7d8817
15a8627
5bc9409
 
e5a75ef
 
 
5bc9409
e5a75ef
 
 
5bc9409
8263fc6
6405936
 
 
 
e5a75ef
97567b1
 
40a5fd5
97567b1
 
 
e5a75ef
9cdaf5d
 
4a91cdc
 
 
 
 
 
 
6405936
 
97567b1
976671e
 
 
 
 
 
 
 
 
0ef5dee
976671e
40a5fd5
 
 
4ab7724
40a5fd5
 
8263fc6
0ef5dee
 
40a5fd5
 
0ef5dee
4ab7724
5bc9409
40a5fd5
5bc9409
 
 
 
 
 
c37eeb3
5bc9409
 
 
 
 
 
c37eeb3
5bc9409
 
 
 
 
 
 
c37eeb3
5bc9409
 
 
 
 
 
 
 
8263fc6
e5a75ef
 
 
 
 
c837d9c
 
4ab7724
 
 
 
c837d9c
4ab7724
c837d9c
4ab7724
976671e
 
 
 
 
97567b1
5bc9409
 
 
8263fc6
5bc9409
 
6405936
 
 
 
 
976671e
e5a75ef
6405936
 
 
4ab7724
 
 
 
 
 
e5a75ef
4ab7724
 
6405936
ad4dbe6

import gradio as gr
import spaces
import torch
from diffusers import AutoencoderKL, TCDScheduler
from diffusers.models.model_loading_utils import load_state_dict
from gradio_imageslider import ImageSlider
from huggingface_hub import hf_hub_download

from controlnet_union import ControlNetModel_Union
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline

from PIL import Image, ImageDraw
import numpy as np

MODELS = {
    "RealVisXL V5.0 Lightning": "SG161222/RealVisXL_V5.0_Lightning",
}

config_file = hf_hub_download(
    "xinsir/controlnet-union-sdxl-1.0",
    filename="config_promax.json",
)

config = ControlNetModel_Union.load_config(config_file)
controlnet_model = ControlNetModel_Union.from_config(config)
model_file = hf_hub_download(
    "xinsir/controlnet-union-sdxl-1.0",
    filename="diffusion_pytorch_model_promax.safetensors",
)
state_dict = load_state_dict(model_file)
model, _, _, _, _ = ControlNetModel_Union._load_pretrained_model(
    controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0"
)
model.to(device="cuda", dtype=torch.float16)

vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
).to("cuda")

pipe = StableDiffusionXLFillPipeline.from_pretrained(
    "SG161222/RealVisXL_V5.0_Lightning",
    torch_dtype=torch.float16,
    vae=vae,
    controlnet=model,
    variant="fp16",
).to("cuda")

pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)


@spaces.GPU
def infer(image, model_selection, width, height, overlap_width, num_inference_steps, resize_size, prompt_input=None):
    source = image
    target_size = (width, height)
    target_ratio = (width, height)
    overlap = overlap_width

    # New resizing logic based on resize_size
    if resize_size != -1:
        # Calculate new height to maintain aspect ratio
        aspect_ratio = source.height / source.width
        new_height = int(resize_size * aspect_ratio)
        source = source.resize((resize_size, new_height), Image.LANCZOS)

    # Existing resizing logic (now only applied if resize_size is -1)
    if resize_size == -1:
        if source.width < target_size[0] and source.height < target_size[1]:
            scale_factor = min(target_size[0] / source.width, target_size[1] / source.height)
            new_width = int(source.width * scale_factor)
            new_height = int(source.height * scale_factor)
            source = source.resize((new_width, new_height), Image.LANCZOS)

        if source.width > target_size[0] or source.height > target_size[1]:
            scale_factor = min(target_size[0] / source.width, target_size[1] / source.height)
            new_width = int(source.width * scale_factor)
            new_height = int(source.height * scale_factor)
            source = source.resize((new_width, new_height), Image.LANCZOS)

    margin_x = (target_size[0] - source.width) // 2
    margin_y = (target_size[1] - source.height) // 2

    background = Image.new('RGB', target_size, (255, 255, 255))
    background.paste(source, (margin_x, margin_y))

    mask = Image.new('L', target_size, 255)
    mask_draw = ImageDraw.Draw(mask)
    mask_draw.rectangle([
        (margin_x + overlap, margin_y + overlap),
        (margin_x + source.width - overlap, margin_y + source.height - overlap)
    ], fill=0)

    cnet_image = background.copy()
    cnet_image.paste(0, (0, 0), mask)

    final_prompt = "high quality"
    if prompt_input.strip() != "":
        final_prompt += ", " + prompt_input

    (
        prompt_embeds,
        negative_prompt_embeds,
        pooled_prompt_embeds,
        negative_pooled_prompt_embeds,
    ) = pipe.encode_prompt(final_prompt, "cuda", True)

    for image in pipe(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        pooled_prompt_embeds=pooled_prompt_embeds,
        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
        image=cnet_image,
        num_inference_steps=num_inference_steps
    ):
        yield cnet_image, image

    image = image.convert("RGBA")
    cnet_image.paste(image, (0, 0), mask)

    yield background, cnet_image

def preload_presets(target_ratio):
    if target_ratio == "9:16":
        changed_width = 720
        changed_height = 1280
        return changed_width, changed_height, gr.update(open=False)
    elif target_ratio == "16:9":
        changed_width = 1280
        changed_height = 720
        return changed_width, changed_height, gr.update(open=False)
    elif target_ratio == "Custom":
        return 720, 1280, gr.update(open=True)

def clear_result():
    return gr.update(value=None)


css = """
.gradio-container {
    width: 1200px !important;
}
"""


title = """<h1 align="center">Diffusers Image Outpaint</h1>
<div align="center">Drop an image you would like to extend, pick your expected ratio and hit Generate.</div>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
    <p style="display: flex;gap: 6px;">
         <a href="https://huggingface.co/spaces/fffiloni/diffusers-image-outpout?duplicate=true">
            <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md.svg" alt="Duplicate this Space">
        </a> to skip the queue and enjoy faster inference on the GPU of your choice 
    </p>
</div>
"""

with gr.Blocks(css=css) as demo:
    with gr.Column():
        gr.HTML(title)

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(
                    type="pil",
                    label="Input Image",
                    sources=["upload"],
                    height = 300
                )
                
                prompt_input = gr.Textbox(label="Prompt (Optional)")
                
                with gr.Row():
                    target_ratio = gr.Radio(
                        label = "Expected Ratio",
                        choices = ["9:16", "16:9", "Custom"],
                        value = "9:16",
                        scale = 2
                    )
                    
                    run_button = gr.Button("Generate", scale=1)

                with gr.Accordion(label="Advanced settings", open=False) as settings_panel:
                    with gr.Column(): 
                        with gr.Row():
                            width_slider = gr.Slider(
                                label="Width",
                                minimum=720,
                                maximum=1440,
                                step=8,
                                value=720,
                            )
                            height_slider = gr.Slider(
                                label="Height",
                                minimum=720,
                                maximum=1440,
                                step=8,
                                value=1280,
                            )
                        with gr.Row():
                            model_selection = gr.Dropdown(
                                choices=list(MODELS.keys()),
                                value="RealVisXL V5.0 Lightning",
                                label="Model",
                            )
                            num_inference_steps = gr.Slider(label="Steps", minimum=4, maximum=12, step=1, value=8)

                        overlap_width = gr.Slider(
                            label="Mask overlap width",
                            minimum=1,
                            maximum=50,
                            value=42,
                            step=1
                        )
                        
                        resize_size = gr.Number(
                            label="Resize Size (-1 for default behavior)",
                            value=-1,
                            precision=0
                        )

                gr.Examples(
                    examples=[
                        ["./examples/example_1.webp", "RealVisXL V5.0 Lightning", 1280, 720],  
                        ["./examples/example_2.jpg", "RealVisXL V5.0 Lightning", 720, 1280],  
                        ["./examples/example_3.jpg", "RealVisXL V5.0 Lightning", 1024, 1024],  
                    ],
                    inputs=[input_image, model_selection, width_slider, height_slider],
                )

            with gr.Column():
                result = ImageSlider(
                    interactive=False,
                    label="Generated Image",
                )

    target_ratio.change(
        fn = preload_presets,
        inputs = [target_ratio],
        outputs = [width_slider, height_slider, settings_panel],
        queue = False
    )
    run_button.click(
        fn=clear_result,
        inputs=None,
        outputs=result,
    ).then(
        fn=infer,
        inputs=[input_image, model_selection, width_slider, height_slider, overlap_width, num_inference_steps, resize_size, prompt_input],
        outputs=result,
    )

    prompt_input.submit(
        fn=clear_result,
        inputs=None,
        outputs=result,
    ).then(
        fn=infer,
        inputs=[input_image, model_selection, width_slider, height_slider, overlap_width, num_inference_steps, resize_size, prompt_input],
        outputs=result,
    )

demo.queue(max_size=12).launch(share=False, show_error=True, show_api=False)