File size: 6,219 Bytes
a44e7f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
from PIL import Image
import numpy as np
from diffusers import AutoPipelineForText2Image
import torch
import os
# class SaveImageEveryNStepsCallback:
# def __init__(self, output_dir, total_steps, interval):
# self.output_dir = output_dir
# self.interval = interval
# if not os.path.exists(output_dir):
# os.makedirs(output_dir)
# self.step_list = set(range(0, total_steps, interval))
# def __call__(self, scheduler, **kwargs):
# current_step = kwargs["step"]
# if current_step in self.step_list:
# image = kwargs["sample"].detach().cpu().squeeze().permute(1, 2, 0)
# image = (image + 1) / 2 # normalize image
# image = image.clamp(0, 1) * 255 # scale to 0-255
# image = image.numpy().astype("uint8")
# image_path = os.path.join(self.output_dir, f"image_at_step_{current_step}.png")
# Image.fromarray(image).save(image_path)
# print(f"Image saved at step {current_step}")
output_dir = "./saved_images"
# def save_image_callback(pipeline, i, t, latents, **kwargs):
# interval = 5 # Save an image every 5 steps
# if i % interval == 0:
# # Convert latents to image
# image = pipeline.decode_latents_to_image(latents) # Adjust method call according to actual API
# image = (image + 1) / 2 * 255
# image = image.clip(0, 255).astype(np.uint8)
# image = Image.fromarray(image)
# # Save the image
# image_path = os.path.join(output_dir, f"image_at_step_{i}.png")
# image.save(image_path)
# print(f"Image saved at step {i}")
def latents_to_rgb(latents):
weights = (
(60, -60, 25, -70),
(60, -5, 15, -50),
(60, 10, -5, -35),
)
weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1)
image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0)
return Image.fromarray(image_array)
# Callback function to save images at specific intervals
def decode_tensors(pipe, step, timestep, callback_kwargs):
latents = callback_kwargs["latents"]
image = latents_to_rgb(latents[0])
image.save(f"./output_images/{step}.png")
return callback_kwargs
# 加载预训练模型和权重
pipeline = AutoPipelineForText2Image.from_pretrained("bguisard/stable-diffusion-nano-2-1", torch_dtype=torch.float16).to("cuda")
pipeline.load_lora_weights("/root/autodl-tmp/Proj/city_demo/checkpoint-15000",weight_name="pytorch_lora_weights.safetensors")
def generate_image(text,option):
num_steps = 50
interval = num_steps // 10
output_dir = "./intermediate_images"
# callback = SaveImageEveryNStepsCallback(output_dir, num_steps, interval)
# generator = torch.manual_seed(42)
# image = pipeline(text, num_inference_steps=num_steps, generator=generator, callback_on_step_end=decode_tensors,
# callback_on_step_end_tensor_inputs=["latents"])
while True:
image = pipeline(text, num_inference_steps=num_steps)
final_image = image.images[0]
if option == "Ratio < 5":
if calculate_building_ratio(final_image) < 5:
final_pil_image = final_image.convert('L')
return final_pil_image
else:
if calculate_building_ratio(final_image) >= 5:
final_pil_image = final_image.convert('L')
return final_pil_image
# final_pil_image = Image.fromarray((final_image.cpu().numpy() * 255).astype('uint8'))
# Save as JPEG
# image_path = os.path.join(output_dir, "final_image.jpg")
# final_pil_image.save(image_path, "JPEG")
return final_pil_image
# return final_image
# def generate_image(text):
# # 直接指定图片路径
# image_path = '/root/autodl-tmp/Proj/city_diffusion_demo/images/beijing/beijing_0.png'
# # 加载图片
# image = Image.open(image_path)
# return image
def check_requirements(image, requirement):
# 根据选中的要求检查图片
# 示例中的需求检查逻辑需要根据具体需求实现
if requirement == "Option 1":
# 检查条件1
pass
elif requirement == "Option 2":
# 检查条件2
pass
return True # 假设总是返回True
def generate_compliant_image(text, requirements):
while True:
image = generate_image(text)
if check_requirements(image, requirements):
break
return image
def calculate_building_ratio(image):
# 加载图片并转换为灰度图
# img = Image.open(image_path).convert('L')
img_array = np.array(image.convert('L'))
# 建筑区域定义为所有非零像素
building_area = np.count_nonzero(img_array != 255)
# # 找到最高和最低的有建筑的像素行,用于估算楼层高度
# non_zero_rows = np.nonzero(img_array)[0]
# if non_zero_rows.size == 0:
# return 0 # 如果没有建筑,则返回0
# min_row, max_row = np.min(non_zero_rows), np.max(non_zero_rows)
# height = max_row - min_row + 1
height = np.sum(img_array[img_array != 255])
print(height)
print(img_array[img_array != 255])
# 估算楼层数,假设每层楼高3米
floors = height / 3
# 地块的总面积即整个图像的面积
total_area = img_array.size
# 计算比例:建筑的底面积 * 楼层 / 地块的总面积
ratio = (floors) / total_area
print(ratio)
return ratio/10
iface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="Prompt"),
gr.Dropdown(choices=["Ratio < 5", "Ratio >= 5"], label="Select Ratio Requirement")
],
outputs="image",
title="Image of Buildings Generation",
description="Enter text and specify requirements for the generated image. The image will be regenerated until it meets the requirements."
)
iface.launch(share=True)
|