Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,375 Bytes
b983036 9f56d68 b983036 5891366 010024b b983036 24957ab 73ef227 010024b 69ebb10 010024b b983036 3b76478 5891366 9f56d68 b983036 010024b 8596f0b 010024b 8596f0b 010024b b983036 73ef227 b983036 73ef227 b983036 8596f0b b983036 8596f0b b983036 73ef227 b983036 73ef227 b983036 934bde2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import argparse
import os
import torch
from PIL import Image
from diffusers import DDIMScheduler
from controlnet.pipline_controlnet_xs_v2 import StableDiffusionPipelineXSv2
from controlnet.controlnetxs_appearance import StyleCodesModel
from diffusers.models import UNet2DConditionModel
from transformers import AutoProcessor, SiglipVisionModel
import random
def use_stylecode(model,image_path, prompt,negative_prompt, num_inference_steps, stylecode,seed=None,image=None):
# Load and preprocess image
# Set up model components
unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet", torch_dtype=torch.float16, device="cuda")
stylecodes_model = StyleCodesModel.from_unet(unet, size_ratio=1.0).to(dtype=torch.float16, device="cuda")
print("running prompt = ",prompt, " negative_prompt = ",negative_prompt, " with code ", stylecode, " and seed ",seed)
stylecodes_model.load_model(model)
pipe = StableDiffusionPipelineXSv2.from_pretrained(
"runwayml/stable-diffusion-v1-5",
unet=unet,
stylecodes_model=stylecodes_model,
torch_dtype=torch.float16,
device="cuda",
#scheduler=noise_scheduler,
feature_extractor=None,
safety_checker=None,
)
pipe.enable_model_cpu_offload()
if image is None:
image = Image.open(image_path).convert("RGB")
image = image.resize((512, 512))
# Set up generator with a fixed seed for reproducibility
if seed is not None and seed != -1:
generator = torch.Generator(device="cuda").manual_seed(seed)
else:
random_seed = random.randint(0, 2**32 - 1)
print("using random seed ",random_seed)
generator = torch.Generator(device="cuda").manual_seed(random_seed)
# Run the image through the pipeline with the specified prompt
output_images = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=3,
#image=image,
num_inference_steps=num_inference_steps,
generator=generator,
controlnet_conditioning_scale=0.9,
width=512,
height=512,
stylecode=stylecode,
).images
return output_images
def process_single_image_both_ways(model,image_path, prompt, num_inference_steps,image=None):
# Load and preprocess image
# Set up model components
unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet", torch_dtype=torch.float16, device="cuda")
stylecodes_model = StyleCodesModel.from_unet(unet, size_ratio=1.0).to(dtype=torch.float16, device="cuda")
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
stylecodes_model.load_model(model)
pipe = StableDiffusionPipelineXSv2.from_pretrained(
"runwayml/stable-diffusion-v1-5",
unet=unet,
stylecodes_model=stylecodes_model,
torch_dtype=torch.float16,
device="cuda",
#scheduler=noise_scheduler,
feature_extractor=None,
safety_checker=None,
)
pipe.enable_model_cpu_offload()
if image is None:
image = Image.open(image_path).convert("RGB")
image = image.resize((512, 512))
# Set up generator with a fixed seed for reproducibility
seed = 238
generator = torch.Generator(device="cuda").manual_seed(seed)
# Run the image through the pipeline with the specified prompt
output_images = pipe(
prompt=prompt,
guidance_scale=3,
image=image,
num_inference_steps=num_inference_steps,
generator=generator,
controlnet_conditioning_scale=0.9,
width=512,
height=512,
stylecode=None,
).images
return output_images
# Save the output image
def make_stylecode(model,image_path, image=None):
# Set up model components
unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet", torch_dtype=torch.float16, device="cuda")
stylecodes_model = StyleCodesModel.from_unet(unet, size_ratio=1.0).to(dtype=torch.float16, device="cuda")
stylecodes_model.requires_grad_(False)
stylecodes_model= stylecodes_model.to("cuda")
stylecodes_model.load_model(model)
# Load and preprocess image
if image is None:
image = Image.open(image_path).convert("RGB")
image = image.resize((512, 512))
# Set up generator with a fixed seed for reproducibility
seed = 238
clip_image_processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
image_encoder = SiglipVisionModel.from_pretrained("google/siglip-base-patch16-224").to(dtype=torch.float16,device=stylecodes_model.device)
clip_image = clip_image_processor(images=image, return_tensors="pt").pixel_values
clip_image = clip_image.to(stylecodes_model.device, dtype=torch.float16)
clip_image = {"pixel_values": clip_image}
clip_image_embeds = image_encoder(**clip_image, output_hidden_states=True).hidden_states[-2]
# Run the image through the pipeline with the specified prompt
code = stylecodes_model.sref_autoencoder.make_stylecode(clip_image_embeds)
print("stylecode = ",code)
return code |