ford442 commited on
Commit
2fd5bd1
·
verified ·
1 Parent(s): 34665aa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +298 -0
app.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ import numpy as np
4
+
5
+ #import tensorrt as trt
6
+
7
+ import random
8
+ import torch
9
+ from diffusers import StableDiffusion3Pipeline, AutoencoderKL, StableDiffusionXLImg2ImgPipeline, EDMEulerScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
11
+ from threading import Thread
12
+ from transformers import pipeline
13
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
14
+ import re
15
+ import paramiko
16
+ import urllib
17
+ import time
18
+ import os
19
+
20
+ FTP_HOST = "1ink.us"
21
+ FTP_USER = "ford442"
22
+ FTP_PASS = "GoogleBez12!"
23
+ FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
24
+
25
+ torch.backends.cuda.matmul.allow_tf32 = False
26
+ torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
27
+ torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
28
+ torch.backends.cudnn.allow_tf32 = False
29
+ torch.backends.cudnn.deterministic = False
30
+ torch.backends.cudnn.benchmark = False
31
+ torch.backends.cuda.preferred_blas_library="cublas"
32
+ torch.backends.cuda.preferred_linalg_library="cusolver"
33
+
34
+ torch.set_float32_matmul_precision("highest")
35
+
36
+ def upload_to_ftp(filename):
37
+ try:
38
+ transport = paramiko.Transport((FTP_HOST, 22))
39
+ destination_path=FTP_DIR+filename
40
+ transport.connect(username = FTP_USER, password = FTP_PASS)
41
+ sftp = paramiko.SFTPClient.from_transport(transport)
42
+ sftp.put(filename, destination_path)
43
+ sftp.close()
44
+ transport.close()
45
+ print(f"Uploaded {filename} to FTP server")
46
+ except Exception as e:
47
+ print(f"FTP upload error: {e}")
48
+
49
+ device = torch.device("cuda")
50
+ torch_dtype = torch.bfloat16
51
+
52
+ checkpoint = "microsoft/Phi-3.5-mini-instruct"
53
+ #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
54
+ vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16, device_map='balanced')
55
+
56
+ #pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16, device_map='balanced')
57
+ pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", torch_dtype=torch.float32, device_map='balanced')
58
+
59
+ # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
60
+
61
+ #pipe.scheduler.config.requires_aesthetics_score = False
62
+ #pipe.enable_model_cpu_offload()
63
+ #pipe.to(device)
64
+ #pipe = torch.compile(pipe)
65
+ # pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, beta_schedule="scaled_linear")
66
+
67
+ refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("ford442/stable-diffusion-xl-refiner-1.0-bf16", vae=vae, torch_dtype=torch.bfloat16, use_safetensors=True, requires_aesthetics_score=True, device_map='balanced')
68
+ #refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=vae, torch_dtype=torch.float32, requires_aesthetics_score=True, device_map='balanced')
69
+
70
+ #refiner.enable_model_cpu_offload()
71
+
72
+ #refiner.scheduler.config.requires_aesthetics_score=False
73
+ #refiner.to(device)
74
+ #refiner = torch.compile(refiner)
75
+ refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config, beta_schedule="scaled_linear")
76
+
77
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False, device_map='balanced')
78
+ tokenizer.tokenizer_legacy=False
79
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='balanced')
80
+ #model = torch.compile(model)
81
+
82
+ def filter_text(text):
83
+ """Filters out the text up to and including 'Rewritten Prompt:'."""
84
+ pattern = r".*?Rewritten Prompt:\s*" # Matches any characters up to 'Rewritten Prompt:'
85
+ filtered_text = re.sub(pattern, "", text,flags=re.DOTALL) # Removes the matched pattern from the text
86
+ return filtered_text
87
+
88
+ MAX_SEED = np.iinfo(np.int32).max
89
+ MAX_IMAGE_SIZE = 4096
90
+
91
+ @spaces.GPU(duration=60)
92
+ def infer(
93
+ prompt,
94
+ negative_prompt,
95
+ seed,
96
+ randomize_seed,
97
+ width,
98
+ height,
99
+ guidance_scale,
100
+ num_inference_steps,
101
+ progress=gr.Progress(track_tqdm=True),
102
+ ):
103
+ seed = random.randint(0, MAX_SEED)
104
+ generator = torch.Generator(device='cpu').manual_seed(seed)
105
+
106
+ system_prompt_rewrite = (
107
+ "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
108
+ )
109
+ user_prompt_rewrite = (
110
+ "Rewrite this prompt to be more descriptive and detailed: "
111
+ )
112
+ input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
113
+ print("-- got prompt --")
114
+ # Encode the input text and include the attention mask
115
+ encoded_inputs = tokenizer(
116
+ input_text, return_tensors="pt", return_attention_mask=True
117
+ )
118
+ # Ensure all values are on the correct device
119
+ input_ids = encoded_inputs["input_ids"].to(device)
120
+ attention_mask = encoded_inputs["attention_mask"].to(device)
121
+ print("-- tokenize prompt --")
122
+ # Google T5
123
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
124
+ outputs = model.generate(
125
+ input_ids=input_ids,
126
+ attention_mask=attention_mask,
127
+ max_new_tokens=77,
128
+ temperature=0.2,
129
+ top_p=0.9,
130
+ do_sample=True,
131
+ )
132
+ # Use the encoded tensor 'text_inputs' here
133
+ enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
134
+ print('-- generated prompt --')
135
+ print(enhanced_prompt)
136
+ enhanced_prompt = filter_text(enhanced_prompt)
137
+ print('-- filtered prompt --')
138
+ print(enhanced_prompt)
139
+ print('-- generating image --')
140
+ sd_image = pipe(
141
+ prompt=enhanced_prompt, # This conversion is fine
142
+ negative_prompt=negative_prompt,
143
+ guidance_scale=guidance_scale,
144
+ num_inference_steps=num_inference_steps,
145
+ width=width,
146
+ height=height,
147
+ generator=generator
148
+ ).images[0]
149
+ print('-- got image --')
150
+ image_path = f"sd35m_{seed}.png"
151
+ sd_image.save(image_path)
152
+ upload_to_ftp(image_path)
153
+ refine = refiner(
154
+ prompt=f"{prompt}, high quality masterpiece, complex details",
155
+ negative_prompt = negative_prompt,
156
+ guidance_scale=7.5,
157
+ num_inference_steps=num_inference_steps,
158
+ image=sd_image,
159
+ generator=generator,
160
+ ).images[0]
161
+ refine_path = f"refine_{seed}.png"
162
+ refine.save(refine_path)
163
+ upload_to_ftp(refine_path)
164
+ return refine, seed, refine_path, enhanced_prompt
165
+
166
+ examples = [
167
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
168
+ "An astronaut riding a green horse",
169
+ "A delicious ceviche cheesecake slice",
170
+ ]
171
+
172
+ css = """
173
+ #col-container {
174
+ margin: 0 auto;
175
+ max-width: 640px;
176
+ }
177
+ """
178
+
179
+ def repeat_infer(
180
+ prompt,
181
+ negative_prompt,
182
+ seed,
183
+ randomize_seed,
184
+ width,
185
+ height,
186
+ guidance_scale,
187
+ num_inference_steps,
188
+ num_iterations, # New input for number of iterations
189
+ ):
190
+ i = 0
191
+ while i < num_iterations:
192
+ time.sleep(700) # Wait for 10 minutes (600 seconds)
193
+ result, seed, image_path, enhanced_prompt = infer(
194
+ prompt,
195
+ negative_prompt,
196
+ seed,
197
+ randomize_seed,
198
+ width,
199
+ height,
200
+ guidance_scale,
201
+ num_inference_steps,
202
+ )
203
+
204
+ # Optionally, you can add logic here to process the results of each iteration
205
+ # For example, you could display the image, save it with a different name, etc.
206
+ i += 1
207
+ return result, seed, image_path, enhanced_prompt
208
+
209
+
210
+ with gr.Blocks(css=css) as demo:
211
+ with gr.Column(elem_id="col-container"):
212
+ gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Medium (with refine)")
213
+ expanded_prompt_output = gr.Textbox(label="Expanded Prompt", lines=5) # Add this line
214
+ gr.File(label="Latents File (optional)"), # Add a file input for latents
215
+ with gr.Row():
216
+ prompt = gr.Text(
217
+ label="Prompt",
218
+ show_label=False,
219
+ max_lines=1,
220
+ placeholder="Enter your prompt",
221
+ value="A captivating Christmas scene.",
222
+ container=False,
223
+ )
224
+ run_button = gr.Button("Run", scale=0, variant="primary")
225
+ result = gr.Image(label="Result", show_label=False)
226
+ with gr.Accordion("Advanced Settings", open=False):
227
+ negative_prompt = gr.Text(
228
+ label="Negative prompt",
229
+ max_lines=1,
230
+ placeholder="Enter a negative prompt",
231
+ visible=False,
232
+ )
233
+ num_iterations = gr.Number(
234
+ value=1000,
235
+ label="Number of Iterations")
236
+ seed = gr.Slider(
237
+ label="Seed",
238
+ minimum=0,
239
+ maximum=MAX_SEED,
240
+ step=1,
241
+ value=0,
242
+ )
243
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
244
+ with gr.Row():
245
+ width = gr.Slider(
246
+ label="Width",
247
+ minimum=256,
248
+ maximum=MAX_IMAGE_SIZE,
249
+ step=32,
250
+ value=768, # Replace with defaults that work for your model
251
+ )
252
+ height = gr.Slider(
253
+ label="Height",
254
+ minimum=256,
255
+ maximum=MAX_IMAGE_SIZE,
256
+ step=32,
257
+ value=768, # Replace with defaults that work for your model
258
+ )
259
+ guidance_scale = gr.Slider(
260
+ label="Guidance scale",
261
+ minimum=0.0,
262
+ maximum=10.0,
263
+ step=0.1,
264
+ value=5.0, # Replace with defaults that work for your model
265
+ )
266
+ num_inference_steps = gr.Slider(
267
+ label="Number of inference steps",
268
+ minimum=1,
269
+ maximum=500,
270
+ step=1,
271
+ value=75, # Replace with defaults that work for your model
272
+ )
273
+ save_button = gr.Button("Save Image")
274
+ image_path_output = gr.Text(visible=False) # Hidden component to store the path
275
+ save_button.click(
276
+ fn=lambda image_path: None, # No-op function, the path is already available
277
+ inputs=[image_path_output],
278
+ outputs=None,
279
+ )
280
+ gr.Examples(examples=examples, inputs=[prompt])
281
+ gr.on(
282
+ triggers=[run_button.click, prompt.submit],
283
+ fn=infer,
284
+ inputs=[
285
+ prompt,
286
+ negative_prompt,
287
+ seed,
288
+ randomize_seed,
289
+ width,
290
+ height,
291
+ guidance_scale,
292
+ num_inference_steps,
293
+ ],
294
+ outputs=[result, seed, image_path_output, expanded_prompt_output],
295
+ )
296
+
297
+ if __name__ == "__main__":
298
+ demo.launch()