import torch
from diffusers.utils import load_image
from diffusers import FluxControlNetPipeline, FluxControlNetModel, FluxMultiControlNetModel
import gradio as gr
import spaces

base_model = 'black-forest-labs/FLUX.1-dev'
controlnet_model_union = 'InstantX/FLUX.1-dev-Controlnet-Union'

controlnet_union = FluxControlNetModel.from_pretrained(controlnet_model_union, torch_dtype=torch.bfloat16)
controlnet = FluxMultiControlNetModel([controlnet_union])  # we always recommend loading via FluxMultiControlNetModel

pipe = FluxControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16)
pipe.to("cuda")

control_modes = [
    "canny",
    "tile",
    "depth",
    "blur",
    "pose",
    "gray",
    "lq"
]

@spaces.GPU
def generate_image(prompt, control_image_depth, control_mode_depth_index, control_image_canny, control_mode_canny_index):
    control_mode_depth = control_modes.index(control_mode_depth_index)
    control_mode_canny = control_modes.index(control_mode_canny_index)

    width, height = control_image_depth.size

    image = pipe(
        prompt,
        control_image=[control_image_depth, control_image_canny],
        control_mode=[control_mode_depth, control_mode_canny],
        width=width,
        height=height,
        controlnet_conditioning_scale=[0.2, 0.4],
        num_inference_steps=24,
        guidance_scale=3.5,
        generator=torch.manual_seed(42),
    ).images[0]

    return image

iface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Text(label="Prompt"),
        gr.Image(label="Control Image (Depth)"),
        gr.Dropdown(choices=control_modes, value="depth", label="Control Mode (Depth)"),
        gr.Image(label="Control Image (Canny)"),
        gr.Dropdown(choices=control_modes, value="canny", label="Control Mode (Canny)")
    ],
    outputs=gr.Image(label="Generated Image"),
    title="FluxControlNet Image Generation",
    description="Generate an image using FluxControlNet with depth and canny control images.",
)

iface.launch()