Spaces:

emban362
/

hackathon-2025

Running on Zero

App Files Files Community

Andre Embury commited on May 1

Commit

8297189

unverified ·

1 Parent(s): 84d59ce

First test with ControlNet Union

Browse files

Take inspiration:
https://github.com/xinsir6/ControlNetPlus/blob/main/controlnet_union_test_canny.py

Files changed (1) hide show

app.py +63 -17

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import numpy as np
 from diffusers import (
     # StableDiffusionControlNetImg2ImgPipeline,
     ControlNetModel,
     StableDiffusionXLControlNetPipeline,
 )
 import torch
@@ -14,9 +15,14 @@ import torch
 import requests
 from fastapi import FastAPI, HTTPException
 from PIL import Image
-# from controlnet_aux import CannyDetector
-from controlnet_aux import ScribbleDetector
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
@@ -48,23 +54,36 @@ else:
 #     variant="fp16",
 #     use_safetensors=True,
 # ).to(device)
-# Load SDXL-compatible ControlNet (scribble version)
-controlnet = ControlNetModel.from_pretrained(
-    "diffusers/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16
 )
-# Load SDXL base pipeline with the ControlNet
-pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
-    controlnet=controlnet,
     torch_dtype=torch.float16,
-    variant="fp16",
-    use_safetensors=True,
-).to(device)
-# canny = CannyDetector()
-scribble_detector = ScribbleDetector()
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
@@ -111,10 +130,17 @@ def infer(
     # img = Image.open(io.BytesIO(resp.content)).convert("RGB")
     img = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
     # img = img.resize((req.width, req.height))
-    img = img.resize((width, height))
     # control_net_image = canny(img).resize((width, height))
-    control_net_image = scribble_detector(img).resize((width, height))
     prompt = (
         "redraw the logo from scratch, clean sharp vector-style, "
@@ -124,8 +150,8 @@ def infer(
     output = pipe(
         prompt=prompt,
         negative_prompt=NEGATIVE,
-        image=img,
-        control_image=control_net_image,
         # strength=req.strength,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
@@ -153,6 +179,26 @@ NEGATIVE = "blurry, distorted, messy, gradients, background noise"
 WIDTH = 512
 HEIGHT = 512
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # Text-to-Image Gradio Template")

 from diffusers import (
     # StableDiffusionControlNetImg2ImgPipeline,
     ControlNetModel,
+    ControlNetUnionModel,
     StableDiffusionXLControlNetPipeline,
 )
 import torch
 import requests
 from fastapi import FastAPI, HTTPException
 from PIL import Image
+from controlnet_aux import CannyDetector
+from diffusers import AutoencoderKL
+from diffusers import (
+    EulerAncestralDiscreteScheduler,
+    StableDiffusionXLControlNetUnionPipeline,
+)
+import cv2
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
 #     variant="fp16",
 #     use_safetensors=True,
 # ).to(device)
+# # pipe = pipe.to(device)
+# canny = CannyDetector()
+eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler"
 )
+# when test with other base model, you need to change the vae also.
+vae = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
+)
+controlnet_model = ControlNetUnionModel.from_pretrained(
+    "xinsir/controlnet-union-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True
+)
+# controlnet_union_model = ControlNetUnionModel([controlnet_model])
+pipe = StableDiffusionXLControlNetUnionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet_model,
+    vae=vae,
     torch_dtype=torch.float16,
+    scheduler=eulera_scheduler,
+    control_mode=[0],
+)
+pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
     # img = Image.open(io.BytesIO(resp.content)).convert("RGB")
     img = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
     # img = img.resize((req.width, req.height))
+    # img = img.resize((width, height))
     # control_net_image = canny(img).resize((width, height))
+    img_np = np.array(img)
+    controlnet_img = cv2.resize(img_np, (width, height))
+    controlnet_img = cv2.Canny(controlnet_img, 100, 200)
+    controlnet_img = HWC3(controlnet_img)
+    controlnet_img = Image.fromarray(controlnet_img)
     prompt = (
         "redraw the logo from scratch, clean sharp vector-style, "
     output = pipe(
         prompt=prompt,
         negative_prompt=NEGATIVE,
+        # image=img,
+        control_image=controlnet_img,
         # strength=req.strength,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
 WIDTH = 512
 HEIGHT = 512
+def HWC3(x):
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if C == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        y = y.clip(0, 255).astype(np.uint8)
+        return y
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # Text-to-Image Gradio Template")