File size: 2,029 Bytes
683afc3
c1497a6
0737dc8
74c4e79
9754bfe
f5ffe3a
 
 
 
feede18
4fbc46c
c1497a6
683afc3
cd0d25d
f5ffe3a
 
 
 
683afc3
f5ffe3a
 
 
 
 
 
b12bc82
f5ffe3a
 
 
bcbf6e0
0737dc8
74c4e79
5a5a07a
f5ffe3a
 
 
 
 
 
 
 
 
 
 
 
 
0737dc8
f5ffe3a
683afc3
7968596
 
 
 
 
feede18
7968596
 
 
 
 
9754bfe
7968596
683afc3
7968596
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
from huggingface_hub import login
import os
import spaces
import torch
from diffusers import StableDiffusionXLPipeline
from PIL import Image

from ip_adapter import IPAdapterXL

token = os.getenv("HF_TOKEN")
login(token=token)


base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
image_encoder_path = "sdxl_models/image_encoder"
ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
device = "cuda"

# load SDXL pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    add_watermarker=False,
)

# reduce memory consumption
pipe.enable_vae_tiling()
ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])


@spaces.GPU
def generate_image(prompt, reference_image, controlnet_conditioning_scale):
    image = Image.open(reference_image)
    image.resize((512, 512))
    images = ip_model.generate(pil_image=image,
                               prompt=prompt,
                               negative_prompt="",
                               scale=controlnet_conditioning_scale,
                               guidance_scale=5,
                               num_samples=1,
                               num_inference_steps=30,
                               seed=42,
                               # neg_content_prompt="a rabbit",
                               # neg_content_scale=0.5,
                               )

    return images[0]

# Set up Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Image( type= "filepath",label="Reference Image (Style)"),
        gr.Slider(label="Control Net Conditioning Scale", minimum=0, maximum=1.0, step=0.1, value=0.6),
    ],
    outputs="image",
    title="Image Generation with Stable Diffusion 3 medium and ControlNet",
    description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet."

)

interface.launch()