File size: 6,219 Bytes
a44e7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
from PIL import Image
import numpy as np
from diffusers import AutoPipelineForText2Image
import torch
import os
# class SaveImageEveryNStepsCallback:
#     def __init__(self, output_dir, total_steps, interval):
#         self.output_dir = output_dir
#         self.interval = interval
#         if not os.path.exists(output_dir):
#             os.makedirs(output_dir)
#         self.step_list = set(range(0, total_steps, interval))

#     def __call__(self, scheduler, **kwargs):
#         current_step = kwargs["step"]
#         if current_step in self.step_list:
#             image = kwargs["sample"].detach().cpu().squeeze().permute(1, 2, 0)
#             image = (image + 1) / 2  # normalize image
#             image = image.clamp(0, 1) * 255  # scale to 0-255
#             image = image.numpy().astype("uint8")
#             image_path = os.path.join(self.output_dir, f"image_at_step_{current_step}.png")
#             Image.fromarray(image).save(image_path)
#             print(f"Image saved at step {current_step}")
output_dir = "./saved_images"
# def save_image_callback(pipeline, i, t, latents, **kwargs):
#     interval = 5  # Save an image every 5 steps
#     if i % interval == 0:
#         # Convert latents to image
#         image = pipeline.decode_latents_to_image(latents)  # Adjust method call according to actual API
#         image = (image + 1) / 2 * 255
#         image = image.clip(0, 255).astype(np.uint8)
#         image = Image.fromarray(image)
#         # Save the image
#         image_path = os.path.join(output_dir, f"image_at_step_{i}.png")
#         image.save(image_path)
#         print(f"Image saved at step {i}")
        
def latents_to_rgb(latents):
    weights = (
        (60, -60, 25, -70),
        (60, -5, 15, -50),
        (60, 10, -5, -35),
    )
    weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
    biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
    rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1)
    image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0)
    return Image.fromarray(image_array)

# Callback function to save images at specific intervals
def decode_tensors(pipe, step, timestep, callback_kwargs):
    latents = callback_kwargs["latents"]
    image = latents_to_rgb(latents[0])
    image.save(f"./output_images/{step}.png")
    return callback_kwargs
# 加载预训练模型和权重
pipeline = AutoPipelineForText2Image.from_pretrained("bguisard/stable-diffusion-nano-2-1", torch_dtype=torch.float16).to("cuda")
pipeline.load_lora_weights("/root/autodl-tmp/Proj/city_demo/checkpoint-15000",weight_name="pytorch_lora_weights.safetensors")

def generate_image(text,option):
    num_steps = 50
    interval = num_steps // 10
    output_dir = "./intermediate_images"
    # callback = SaveImageEveryNStepsCallback(output_dir, num_steps, interval)
    # generator = torch.manual_seed(42)
    # image = pipeline(text, num_inference_steps=num_steps, generator=generator, callback_on_step_end=decode_tensors,  
    # callback_on_step_end_tensor_inputs=["latents"])
    while True:
        image = pipeline(text, num_inference_steps=num_steps)
        final_image = image.images[0]
        if option == "Ratio < 5":
            if calculate_building_ratio(final_image) < 5:
                final_pil_image = final_image.convert('L')
                return final_pil_image
        else:
            if calculate_building_ratio(final_image) >= 5:
                final_pil_image = final_image.convert('L')
                return final_pil_image           
        
        # final_pil_image = Image.fromarray((final_image.cpu().numpy() * 255).astype('uint8'))
          
    
   
    # Save as JPEG
    # image_path = os.path.join(output_dir, "final_image.jpg")
    # final_pil_image.save(image_path, "JPEG")

    return final_pil_image
    # return final_image
            
# def generate_image(text):
#     # 直接指定图片路径
#     image_path = '/root/autodl-tmp/Proj/city_diffusion_demo/images/beijing/beijing_0.png'
    
#     # 加载图片
#     image = Image.open(image_path)
    
#     return image

def check_requirements(image, requirement):
    # 根据选中的要求检查图片
    # 示例中的需求检查逻辑需要根据具体需求实现
    if requirement == "Option 1":
        # 检查条件1
        pass
    elif requirement == "Option 2":
        # 检查条件2
        pass
    return True  # 假设总是返回True

def generate_compliant_image(text, requirements):
    while True:
        image = generate_image(text)
        if check_requirements(image, requirements):
            break
    return image
def calculate_building_ratio(image):
    # 加载图片并转换为灰度图
    # img = Image.open(image_path).convert('L')
    img_array = np.array(image.convert('L'))

    # 建筑区域定义为所有非零像素
    building_area = np.count_nonzero(img_array != 255)

    # # 找到最高和最低的有建筑的像素行,用于估算楼层高度
    # non_zero_rows = np.nonzero(img_array)[0]
    # if non_zero_rows.size == 0:
    #     return 0  # 如果没有建筑,则返回0
    # min_row, max_row = np.min(non_zero_rows), np.max(non_zero_rows)
    # height = max_row - min_row + 1
    height = np.sum(img_array[img_array != 255])
    print(height)
    print(img_array[img_array != 255])
    # 估算楼层数,假设每层楼高3米
    floors = height / 3

    # 地块的总面积即整个图像的面积
    total_area = img_array.size

    # 计算比例:建筑的底面积 * 楼层 / 地块的总面积
    ratio = (floors) / total_area
    print(ratio)
    return ratio/10



iface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Dropdown(choices=["Ratio < 5", "Ratio >= 5"], label="Select Ratio Requirement")
    ],
    outputs="image",
    title="Image of Buildings Generation",
    description="Enter text and specify requirements for the generated image. The image will be regenerated until it meets the requirements."
)

iface.launch(share=True)