File size: 5,164 Bytes
763126f
 
 
23c3d80
 
 
6a9c346
 
4f62d1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b1a081
4f62d1a
 
 
 
 
 
 
 
763126f
 
 
 
 
 
 
 
 
 
 
4f62d1a
 
 
 
23c3d80
754b60e
6a9c346
4f62d1a
23c3d80
9ecb20f
23c3d80
4f62d1a
 
 
 
11d79a9
 
 
 
 
 
 
 
 
 
4f62d1a
 
 
 
7f15638
4f62d1a
 
 
 
6a9c346
4f62d1a
6a9c346
4f62d1a
 
 
 
 
 
 
 
 
 
 
23c3d80
4f62d1a
e794576
4b1a081
 
 
23c3d80
 
 
 
 
4b1a081
 
 
 
 
6a9c346
23c3d80
4f62d1a
 
4b1a081
 
763126f
9f0f9a3
23c3d80
4b1a081
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import torch
import gradio as gr
import numpy as np
from PIL import Image
from einops import rearrange
import requests
import spaces
from huggingface_hub import login
from gradio_imageslider import ImageSlider  # Import ImageSlider

from image_datasets.canny_dataset import canny_processor, c_crop
from src.flux.sampling import denoise_controlnet, get_noise, get_schedule, prepare, unpack
from src.flux.util import load_ae, load_clip, load_t5, load_flow_model, load_controlnet, load_safetensors

# Download and load the ControlNet model
model_url = "https://huggingface.co/XLabs-AI/flux-controlnet-canny-v3/resolve/main/flux-canny-controlnet-v3.safetensors?download=true"
model_path = "./flux-canny-controlnet-v3.safetensors"
if not os.path.exists(model_path):
    response = requests.get(model_url)
    with open(model_path, 'wb') as f:
        f.write(response.content)

# Configuration for the model
name = "flux-dev"
device = torch.device("cuda")
is_schnell = name == "flux-schnell"

def preprocess_image(image, target_width, target_height, crop=True):
    if crop:
        image = c_crop(image)  # Crop the image to square
        original_width, original_height = image.size
        scale = max(target_width / original_width, target_height / original_height)
        resized_width = int(scale * original_width)
        resized_height = int(scale * original_height)
        image = image.resize((resized_width, resized_height), Image.LANCZOS)
        left = (resized_width - target_width) // 2
        top = (resized_height - target_height) // 2
        image = image.crop((left, top, left + target_width, top + target_height))
    else:
        image = image.resize((target_width, target_height), Image.LANCZOS)
    
    return image

def preprocess_canny_image(image, target_width, target_height, crop=True):
    image = preprocess_image(image, target_width, target_height, crop=crop)
    image = canny_processor(image)
    return image

@spaces.GPU(duration=120)
def generate_image(prompt, control_image, num_steps=50, guidance=4, width=512, height=512, seed=42, random_seed=False):
    if random_seed:
        seed = np.random.randint(0, 999999999)
    
    if not os.path.isdir("./controlnet_results/"):
        os.makedirs("./controlnet_results/")

    torch_device = torch.device("cuda")
    torch.cuda.empty_cache()  # Clear GPU cache
    
    model = load_flow_model(name, device=torch_device)
    t5 = load_t5(torch_device, max_length=256 if is_schnell else 512)
    clip = load_clip(torch_device)
    ae = load_ae(name, device=torch_device)
    controlnet = load_controlnet(name, torch_device).to(torch_device).to(torch.bfloat16)

    checkpoint = load_safetensors(model_path)
    controlnet.load_state_dict(checkpoint, strict=False)

    width = 16 * width // 16
    height = 16 * height // 16
    timesteps = get_schedule(num_steps, (width // 8) * (height // 8) // (16 * 16), shift=(not is_schnell))
    
    processed_input = preprocess_image(control_image, width, height)
    canny_processed = preprocess_canny_image(control_image, width, height)
    controlnet_cond = torch.from_numpy((np.array(canny_processed) / 127.5) - 1)
    controlnet_cond = controlnet_cond.permute(2, 0, 1).unsqueeze(0).to(torch.bfloat16).to(torch_device)

    torch.manual_seed(seed)
    with torch.no_grad():
        x = get_noise(1, height, width, device=torch_device, dtype=torch.bfloat16, seed=seed)
        inp_cond = prepare(t5=t5, clip=clip, img=x, prompt=prompt)

        x = denoise_controlnet(model, **inp_cond, controlnet=controlnet, timesteps=timesteps, guidance=guidance, controlnet_cond=controlnet_cond)
        
        x = unpack(x.float(), height, width)
        x = ae.decode(x)

    x1 = x.clamp(-1, 1)
    x1 = rearrange(x1[-1], "c h w -> h w c")
    output_img = Image.fromarray((127.5 * (x1 + 1.0)).cpu().byte().numpy())
    
    return [processed_input, output_img]  # Return both images for slider

def update_value(name, value):
    return f"{name}: {value}"

interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Image(type="pil", label="Control Image"),
        gr.Slider(step=1, minimum=1, maximum=64, value=28, label="Num Steps", interactive=True, info="Steps for the generation"),
        gr.Slider(minimum=0.1, maximum=10, value=4, label="Guidance", interactive=True, info="Guidance scale"),
        gr.Slider(minimum=128, maximum=1024, step=128, value=512, label="Width", interactive=True, info="Image width"),
        gr.Slider(minimum=128, maximum=1024, step=128, value=512, label="Height", interactive=True, info="Image height"),
        gr.Slider(value=42, minimum=0, maximum=99999999, step=1, label="Seed", interactive=True, info="Random seed"),
        gr.Checkbox(label="Random Seed")
    ],
    outputs=ImageSlider(label="Before / After"),  # Use ImageSlider as the output
    title="FLUX.1 Controlnet Canny",
    description="Generate images using ControlNet and a text prompt.\n[[non-commercial license, Flux.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)]",
    live=True,
)

if __name__ == "__main__":
    interface.launch()