test_gradio / app.py
amos1088's picture
test gradio
6c3f566
raw
history blame
2.79 kB
import gradio as gr
import torch
from diffusers import (
StableDiffusion3Pipeline, # For SD3 models like Stable Diffusion 3.5
ControlNetModel,
SD3Transformer2DModel, # Replacing UNet with SD3 transformer
AutoencoderKL,
UniPCMultistepScheduler,
)
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from huggingface_hub import login
import os
# Log in to Hugging Face with token from environment variables
token = os.getenv("HF_TOKEN")
login(token=token)
# Model IDs for the base Stable Diffusion model and ControlNet variant
model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
controlnet_id = "lllyasviel/control_v11p_sd15_inpaint"
# Load each model component required by the pipeline
controlnet = ControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16)
transformer = SD3Transformer2DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.float16)
vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float16)
feature_extractor = CLIPFeatureExtractor.from_pretrained(model_id)
text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder")
tokenizer = CLIPTokenizer.from_pretrained(model_id)
# Initialize the pipeline with all components
pipeline = StableDiffusion3Pipeline(
transformer=transformer, # Using SD3 transformer
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
controlnet=controlnet,
scheduler=UniPCMultistepScheduler.from_config({"name": "UniPCMultistepScheduler"}),
feature_extractor=feature_extractor,
torch_dtype=torch.float16,
)
# Set device for pipeline
pipeline = pipeline.to("cuda") if torch.cuda.is_available() else pipeline
# Enable model CPU offloading for memory optimization
pipeline.enable_model_cpu_offload()
# Gradio interface function
def generate_image(prompt, reference_image):
# Resize and prepare reference image
reference_image = reference_image.convert("RGB").resize((512, 512))
# Generate image using the pipeline with ControlNet
generated_image = pipeline(
prompt=prompt,
image=reference_image,
controlnet_conditioning_scale=1.0,
guidance_scale=7.5,
num_inference_steps=50
).images[0]
return generated_image
# Set up Gradio interface
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="Prompt"),
gr.Image(type="pil", label="Reference Image (Style)")
],
outputs="image",
title="Image Generation with ControlNet (Reference-Only Style Transfer)",
description="Generates an image based on a text prompt and style reference image using Stable Diffusion 3.5 and ControlNet (reference-only mode)."
)
# Launch the Gradio interface
interface.launch()