Spaces:
Runtime error
Runtime error
File size: 2,752 Bytes
2571a09 68bba7b 2571a09 42a1e5c b40804f 68bba7b b40804f 42a1e5c a826a95 68bba7b 2571a09 42a1e5c b40804f 2571a09 a826a95 2ad848e 2571a09 b40804f 2571a09 2ad848e 2571a09 a826a95 2571a09 2ad848e 68bba7b 2ad848e 2571a09 42a1e5c 2571a09 b40804f 3d77d68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
import numpy as np
import spaces # Make sure to import spaces
# Initialize the pipeline
pipeline = AutoPipelineForText2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16
)
# Configure the scheduler for the pipeline
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
# Load IP adapter with specified weights and set the scale for each component
pipeline.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name=[
"ip-adapter-plus_sdxl_vit-h.safetensors",
"ip-adapter-plus-face_sdxl_vit-h.safetensors"
]
)
pipeline.set_ip_adapter_scale([0.7, 0.5])
# Define the desired size for the images
desired_size = (1024, 1024)
@spaces.GPU
def transform_image(face_image):
# Move the pipeline to the GPU inside the function
pipeline.to("cuda")
generator = torch.Generator(device="cuda").manual_seed(0)
# Process the input face image
if isinstance(face_image, Image.Image):
processed_face_image = face_image
elif isinstance(face_image, np.ndarray):
processed_face_image = Image.fromarray(face_image)
else:
raise ValueError("Unsupported image format")
# Resize the face image
processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS)
# Convert PIL images to PyTorch tensors
processed_face_tensor = transforms.ToTensor()(processed_face_image).unsqueeze(0).to("cuda")
style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda")
# Ensure tensors are the correct shape (C, H, W)
if processed_face_tensor.shape[1:] != (3, 1280, 1280):
raise ValueError(f"Face image tensor shape is {processed_face_tensor.shape}, but expected shape is (3, 1280, 1280)")
# Perform the transformation using the configured pipeline
image = pipeline(
prompt="soyjak",
ip_adapter_image=[style_image_tensor, processed_face_tensor],
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
num_inference_steps=30,
generator=generator,
).images[0]
# Move the pipeline back to CPU after processing to release GPU resources
pipeline.to("cpu")
return transforms.ToPILImage()(image.squeeze(0))
# Gradio interface setup
demo = gr.Interface(
fn=transform_image,
inputs=gr.Image(label="Upload your face image"),
outputs=gr.Image(label="Your Soyjak"),
title="InstaSoyjak - turn anyone into a Soyjak",
description="All you need to do is upload an image. Please use responsibly.",
)
demo.queue(max_size=20)
demo.launch() |