Spaces:
Runtime error
Runtime error
File size: 4,666 Bytes
2911f3b 89b3db2 6f042e6 1368e65 2911f3b 4fd60a2 2911f3b adb82a6 a5dfd22 2911f3b e19c312 4fd60a2 6f042e6 4fd60a2 adb82a6 4fd60a2 754b60e 2911f3b 754b60e 6f042e6 4fd60a2 adb82a6 754b60e 2911f3b adb82a6 2911f3b 1368e65 a5dfd22 4fd60a2 a5dfd22 2911f3b 4fd60a2 a5dfd22 2911f3b adb82a6 2911f3b adb82a6 2911f3b adb82a6 2911f3b 14d5805 6f042e6 4fd60a2 6f042e6 adb82a6 4fd60a2 6f042e6 2911f3b adb82a6 1368e65 6f042e6 1368e65 adb82a6 6f042e6 adb82a6 1368e65 2911f3b 1368e65 2911f3b 1368e65 2911f3b adb82a6 1368e65 2911f3b a5dfd22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import torch
import gc
import gradio as gr
import numpy as np
from PIL import Image
from einops import rearrange
import io
import requests
import spaces
from huggingface_hub import login
from gradio_imageslider import ImageSlider
from diffusers.utils import load_image
from diffusers import FluxControlNetPipeline, FluxControlNetModel
# Device settings: CPU for loading, GPU for inference
device_cpu = torch.device("cpu")
device_gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Model identifiers
base_model = 'black-forest-labs/FLUX.1-dev'
controlnet_model = 'InstantX/FLUX.1-dev-Controlnet-Union'
# Load the ControlNet model and pipeline on CPU
controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16).to(device_cpu)
pipe = FluxControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16).to(device_cpu)
controlnet_conditioning_scale = 0.5
control_modes = {
"canny": 0,
"tile": 1,
"depth": 2,
"blur": 3,
"pose": 4,
"gray": 5,
"lq": 6,
}
def load_and_convert_image(image):
"""Load and convert images to a format that PIL can handle."""
if isinstance(image, str):
image = Image.open(image)
elif isinstance(image, bytes):
image = Image.open(io.BytesIO(image))
# Convert AVIF to PNG if necessary
if image.format == 'AVIF':
image = image.convert("RGB") # Convert to a format PIL can handle
return image
def preprocess_image(image, target_width, target_height, crop=True):
"""Preprocess image to match the target dimensions."""
image = load_and_convert_image(image)
if crop:
original_width, original_height = image.size
# Resize to match the target size without stretching
scale = max(target_width / original_width, target_height / original_height)
resized_width = int(scale * original_width)
resized_height = int(scale * original_height)
image = image.resize((resized_width, resized_height), Image.LANCZOS)
# Center crop to match the target dimensions
left = (resized_width - target_width) // 2
top = (resized_height - target_height) // 2
image = image.crop((left, top, left + target_width, top + target_height))
else:
image = image.resize((target_width, target_height), Image.LANCZOS)
return image
def clear_cuda_memory():
"""Clear CUDA memory."""
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
@spaces.GPU(duration=120)
def generate_image(prompt, control_image, control_mode, num_steps=50, guidance=4, width=512, height=512, seed=42, random_seed=False):
"""Generate image using the FLUX.1 ControlNet model."""
clear_cuda_memory()
if random_seed:
seed = np.random.randint(0, 10000)
if not os.path.isdir("./controlnet_results/"):
os.makedirs("./controlnet_results/")
# Move model to GPU for inference
pipe.to(device_gpu)
control_image = preprocess_image(control_image, width, height)
torch.manual_seed(seed)
with torch.no_grad():
image = pipe(
prompt,
control_image=control_image,
control_mode=control_modes[control_mode],
width=width,
height=height,
controlnet_conditioning_scale=controlnet_conditioning_scale,
num_inference_steps=num_steps,
guidance_scale=guidance,
).images[0]
# Move model back to CPU after inference
pipe.to(device_cpu)
return [control_image, image] # Return both images for slider
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="Prompt"),
gr.Image(type="pil", label="Control Image"),
gr.Dropdown(choices=list(control_modes.keys()), label="Control Mode", value="canny"),
gr.Slider(step=1, minimum=1, maximum=64, value=28, label="Num Steps"),
gr.Slider(minimum=0.1, maximum=10, value=4, label="Guidance"),
gr.Slider(minimum=128, maximum=2048, step=128, value=1024, label="Width"),
gr.Slider(minimum=128, maximum=2048, step=128, value=1024, label="Height"),
gr.Number(value=42, label="Seed"),
gr.Checkbox(label="Random Seed")
],
outputs=ImageSlider(label="Before / After"), # Use ImageSlider as the output
title="FLUX.1 Controlnet Canny",
description="Generate images using ControlNet and a text prompt.\n[[non-commercial license, Flux.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)]"
)
if __name__ == "__main__":
interface.launch(share=True)
|