1inkusFace commited on
Commit
b2bf772
·
verified ·
1 Parent(s): 97f4409

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +306 -0
app.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ import numpy as np
4
+ import random
5
+ import torch
6
+ from PIL import Image
7
+ import re
8
+ import paramiko
9
+ import urllib
10
+ import time
11
+ import os
12
+ import datetime
13
+
14
+ from models.transformer_sd3 import SD3Transformer2DModel
15
+ from diffusers import StableDiffusion3Pipeline
16
+ #from transformers import CLIPTextModelWithProjection, T5EncoderModel
17
+ from transformers import CLIPTokenizer, T5TokenizerFast
18
+ #from diffusers import SD3Transformer2DModel, AutoencoderKL
19
+ #from models.transformer_sd3 import SD3Transformer2DModel
20
+ from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
21
+
22
+ from image_gen_aux import UpscaleWithModel
23
+ from huggingface_hub import hf_hub_download
24
+
25
+ FTP_HOST = '1ink.us'
26
+ FTP_USER = 'ford442'
27
+ FTP_PASS = os.getenv("FTP_PASS")
28
+ FTP_DIR = '1ink.us/stable_diff'
29
+
30
+ torch.backends.cuda.matmul.allow_tf32 = False
31
+ torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
32
+ torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
33
+ torch.backends.cudnn.allow_tf32 = False
34
+ torch.backends.cudnn.deterministic = False
35
+ torch.backends.cudnn.benchmark = False
36
+ #torch.backends.cuda.preferred_blas_library="cublas"
37
+ #torch.backends.cuda.preferred_linalg_library="cusolver"
38
+
39
+ hftoken = os.getenv("HF_TOKEN")
40
+
41
+ image_encoder_path = "google/siglip-so400m-patch14-384"
42
+ ipadapter_path = hf_hub_download(repo_id="InstantX/SD3.5-Large-IP-Adapter", filename="ip-adapter.bin")
43
+ model_path = 'ford442/stable-diffusion-3.5-large-bf16'
44
+
45
+ def upload_to_ftp(filename):
46
+ try:
47
+ transport = paramiko.Transport((FTP_HOST, 22))
48
+ destination_path=FTP_DIR+filename
49
+ transport.connect(username = FTP_USER, password = FTP_PASS)
50
+ sftp = paramiko.SFTPClient.from_transport(transport)
51
+ sftp.put(filename, destination_path)
52
+ sftp.close()
53
+ transport.close()
54
+ print(f"Uploaded {filename} to FTP server")
55
+ except Exception as e:
56
+ print(f"FTP upload error: {e}")
57
+
58
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
59
+ torch_dtype = torch.bfloat16
60
+
61
+ transformer = SD3Transformer2DModel.from_pretrained(
62
+ model_path, subfolder="transformer", torch_dtype=torch.bfloat16
63
+ )
64
+
65
+ pipe = StableDiffusion3Pipeline.from_pretrained(
66
+ #"stabilityai # stable-diffusion-3.5-large",
67
+ "ford442/stable-diffusion-3.5-large-bf16",
68
+ # vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
69
+ #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
70
+ # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
71
+ # text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
72
+ # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
73
+ #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
74
+ #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
75
+ tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
76
+ torch_dtype=torch.bfloat16,
77
+ #use_safetensors=False,
78
+ )
79
+
80
+ #pipe.to(device=device, dtype=torch.bfloat16)
81
+
82
+ pipe.to(device)
83
+
84
+ #pipe.to(device=device, dtype=torch.bfloat16)
85
+
86
+ upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
87
+
88
+ MAX_SEED = np.iinfo(np.int32).max
89
+ MAX_IMAGE_SIZE = 4096
90
+
91
+ @spaces.GPU(duration=90)
92
+ def infer(
93
+ prompt,
94
+ negative_prompt_1,
95
+ negative_prompt_2,
96
+ negative_prompt_3,
97
+ width,
98
+ height,
99
+ guidance_scale,
100
+ num_inference_steps,
101
+ latent_file, # Add latents file input
102
+ ip_scale,
103
+ progress=gr.Progress(track_tqdm=True),
104
+ ):
105
+ upscaler_2.to(torch.device('cpu'))
106
+ torch.set_float32_matmul_precision("highest")
107
+ seed = random.randint(0, MAX_SEED)
108
+ generator = torch.Generator(device='cuda').manual_seed(seed)
109
+ enhanced_prompt = prompt
110
+ enhanced_prompt_2 = prompt
111
+
112
+ if latent_file: # Check if a latent file is provided
113
+ # initial_latents = pipe.prepare_latents(
114
+ # batch_size=1,
115
+ # num_channels_latents=pipe.transformer.in_channels,
116
+ # height=pipe.transformer.config.sample_size[0],
117
+ # width=pipe.transformer.config.sample_size[1],
118
+ # dtype=pipe.transformer.dtype,
119
+ # device=pipe.device,
120
+ # generator=generator,
121
+ # )
122
+ sd_image_a = Image.open(latent_file.name).convert('RGB')
123
+ print("-- using image file and loading ip-adapter --")
124
+ pipe.init_ipadapter(
125
+ ip_adapter_path=ip_adapter_path,
126
+ image_encoder_path=image_encoder_path,
127
+ nb_token=64,
128
+ )
129
+ print('-- generating image --')
130
+ #with torch.no_grad():
131
+ sd_image = pipe(
132
+ width=width,
133
+ height=height,
134
+ prompt=enhanced_prompt, # This conversion is fine
135
+ negative_prompt=negative_prompt_1,
136
+ num_inference_steps=num_inference_steps,
137
+ guidance_scale=guidance_scale,
138
+ generator=generator,
139
+ clip_image=sd_image_a,
140
+ ipadapter_scale=ip_scale,
141
+ ).images[0]
142
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
143
+ rv_path = f"sd35_{timestamp}.png"
144
+ sd_image[0].save(rv_path,optimize=False,compress_level=0)
145
+ upload_to_ftp(rv_path)
146
+ else:
147
+ print('-- generating image --')
148
+ #with torch.no_grad():
149
+ sd_image = pipe(
150
+ prompt=prompt, # This conversion is fine
151
+ prompt_2=enhanced_prompt_2,
152
+ prompt_3=enhanced_prompt,
153
+ negative_prompt=negative_prompt_1,
154
+ negative_prompt_2=negative_prompt_2,
155
+ negative_prompt_3=negative_prompt_3,
156
+ guidance_scale=guidance_scale,
157
+ num_inference_steps=num_inference_steps,
158
+ width=width,
159
+ height=height,
160
+ # latents=None,
161
+ # output_type='latent',
162
+ generator=generator,
163
+ max_sequence_length=512
164
+ ).images[0]
165
+ print('-- got image --')
166
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
167
+ #sd35_image = pipe.vae.decode(sd_image / 0.18215).sample
168
+ # sd35_image = sd35_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
169
+ # sd35_image = (sd35_image * 255).round().astype("uint8")
170
+ # image_pil = Image.fromarray(sd35_image[0])
171
+ # sd35_path = f"sd35_{seed}.png"
172
+ # image_pil.save(sd35_path,optimize=False,compress_level=0)
173
+ # upload_to_ftp(sd35_path)
174
+ sd35_path = f"sd35l_{timestamp}.png"
175
+ sd_image.save(sd35_path,optimize=False,compress_level=0)
176
+ upload_to_ftp(sd35_path)
177
+ # Convert the generated image to a tensor
178
+ #generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
179
+ # Encode the generated image into latents
180
+ #with torch.no_grad():
181
+ # generated_latents = pipe.vae.encode(generated_image_tensor.to(torch.bfloat16)).latent_dist.sample().mul_(0.18215)
182
+ #latent_path = f"sd35m_{seed}.pt"
183
+ # Save the latents to a .pt file
184
+ #torch.save(generated_latents, latent_path)
185
+ #upload_to_ftp(latent_path)
186
+ # pipe.unet.to('cpu')
187
+ upscaler_2.to(torch.device('cuda'))
188
+ with torch.no_grad():
189
+ upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
190
+ print('-- got upscaled image --')
191
+ downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
192
+ upscale_path = f"sd35l_upscale_{seed}.png"
193
+ downscale2.save(upscale_path,optimize=False,compress_level=0)
194
+ upload_to_ftp(upscale_path)
195
+ return sd_image, enhanced_prompt
196
+
197
+ examples = [
198
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
199
+ "An astronaut riding a green horse",
200
+ "A delicious ceviche cheesecake slice",
201
+ ]
202
+
203
+ css = """
204
+ #col-container {
205
+ margin: 0 auto;
206
+ max-width: 640px;
207
+ }
208
+ body{
209
+ background-color: blue;
210
+ }
211
+ """
212
+
213
+ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
214
+ with gr.Column(elem_id="col-container"):
215
+ gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Large")
216
+ expanded_prompt_output = gr.Textbox(label="Prompt", lines=5) # Add this line
217
+ with gr.Row():
218
+ prompt = gr.Text(
219
+ label="Prompt",
220
+ show_label=False,
221
+ max_lines=1,
222
+ placeholder="Enter your prompt",
223
+ container=False,
224
+ )
225
+ run_button = gr.Button("Run", scale=0, variant="primary")
226
+ result = gr.Image(label="Result", show_label=False)
227
+ with gr.Accordion("Advanced Settings", open=True):
228
+ latent_file = gr.File(label="Image File (optional)") # Add latents file input
229
+ ip_scale = gr.Slider(
230
+ label="Image Prompt Scale",
231
+ minimum=0.0,
232
+ maximum=2.0,
233
+ step=0.01,
234
+ value=0.5,
235
+ )
236
+ negative_prompt_1 = gr.Text(
237
+ label="Negative prompt 1",
238
+ max_lines=1,
239
+ placeholder="Enter a negative prompt",
240
+ visible=True,
241
+ value="bad anatomy, poorly drawn hands, distorted face, blurry, out of frame, low resolution, grainy, pixelated, disfigured, mutated, extra limbs, bad composition"
242
+ )
243
+ negative_prompt_2 = gr.Text(
244
+ label="Negative prompt 2",
245
+ max_lines=1,
246
+ placeholder="Enter a second negative prompt",
247
+ visible=True,
248
+ value="unrealistic, cartoon, anime, sketch, painting, drawing, illustration, graphic, digital art, render, 3d, blurry, deformed, disfigured, poorly drawn, bad anatomy, mutated, extra limbs, ugly, out of frame, bad composition, low resolution, grainy, pixelated, noisy, oversaturated, undersaturated, (worst quality, low quality:1.3), (bad hands, missing fingers:1.2)"
249
+ )
250
+ negative_prompt_3 = gr.Text(
251
+ label="Negative prompt 3",
252
+ max_lines=1,
253
+ placeholder="Enter a third negative prompt",
254
+ visible=True,
255
+ value="(worst quality, low quality:1.3), (bad anatomy, bad hands, missing fingers, extra digit, fewer digits:1.2), (blurry:1.1), cropped, watermark, text, signature, logo, jpeg artifacts, (ugly, deformed, disfigured:1.2), (poorly drawn:1.2), mutated, extra limbs, (bad proportions, gross proportions:1.2), (malformed limbs, missing arms, missing legs, extra arms, extra legs:1.2), (fused fingers, too many fingers, long neck:1.2), (unnatural body, unnatural pose:1.1), out of frame, (bad composition, poorly composed:1.1), (oversaturated, undersaturated:1.1), (grainy, pixelated:1.1), (low resolution, noisy:1.1), (unrealistic, distorted:1.1), (extra fingers, mutated hands, poorly drawn hands, bad hands:1.3), (missing fingers:1.3)"
256
+ )
257
+ with gr.Row():
258
+ width = gr.Slider(
259
+ label="Width",
260
+ minimum=256,
261
+ maximum=MAX_IMAGE_SIZE,
262
+ step=32,
263
+ value=768, # Replace with defaults that work for your model
264
+ )
265
+ height = gr.Slider(
266
+ label="Height",
267
+ minimum=256,
268
+ maximum=MAX_IMAGE_SIZE,
269
+ step=32,
270
+ value=768, # Replace with defaults that work for your model
271
+ )
272
+ guidance_scale = gr.Slider(
273
+ label="Guidance scale",
274
+ minimum=0.0,
275
+ maximum=30.0,
276
+ step=0.1,
277
+ value=4.2, # Replace with defaults that work for your model
278
+ )
279
+ num_inference_steps = gr.Slider(
280
+ label="Number of inference steps",
281
+ minimum=1,
282
+ maximum=500,
283
+ step=1,
284
+ value=220, # Replace with defaults that work for your model
285
+ )
286
+ gr.Examples(examples=examples, inputs=[prompt])
287
+ gr.on(
288
+ triggers=[run_button.click, prompt.submit],
289
+ fn=infer,
290
+ inputs=[
291
+ prompt,
292
+ negative_prompt_1,
293
+ negative_prompt_2,
294
+ negative_prompt_3,
295
+ width,
296
+ height,
297
+ guidance_scale,
298
+ num_inference_steps,
299
+ latent_file, # Add latent_file to the inputs
300
+ ip_scale,
301
+ ],
302
+ outputs=[result, expanded_prompt_output],
303
+ )
304
+
305
+ if __name__ == "__main__":
306
+ demo.launch()