ford442 commited on
Commit
98b3127
·
verified ·
1 Parent(s): 645ce84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -123
app.py CHANGED
@@ -1,14 +1,10 @@
1
  import spaces
2
  import gradio as gr
3
  import numpy as np
4
- #import tensorrt as trt
5
  import random
6
  import torch
7
- from diffusers import StableDiffusion3Pipeline, AutoencoderKL, SD3Transformer2DModel
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, CLIPTextModelWithProjection, T5EncoderModel
9
- #from threading import Thread
10
- #from transformers import pipeline
11
- from transformers import T5Tokenizer, T5ForConditionalGeneration
12
  import re
13
  import paramiko
14
  import urllib
@@ -16,6 +12,8 @@ import time
16
  import os
17
  from image_gen_aux import UpscaleWithModel
18
  from huggingface_hub import hf_hub_download
 
 
19
  #from models.transformer_sd3 import SD3Transformer2DModel
20
  #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
21
  from PIL import Image
@@ -25,13 +23,13 @@ FTP_USER = "ford442"
25
  FTP_PASS = "GoogleBez12!"
26
  FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
27
 
28
- torch.backends.cuda.matmul.allow_tf32 = False
29
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
30
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
31
- torch.backends.cudnn.allow_tf32 = False
32
  torch.backends.cudnn.deterministic = False
33
  #torch.backends.cudnn.benchmark = False
34
- torch.backends.cuda.preferred_blas_library="cublas"
35
  #torch.backends.cuda.preferred_linalg_library="cusolver"
36
 
37
  hftoken = os.getenv("HF_AUTH_TOKEN")
@@ -56,57 +54,26 @@ def upload_to_ftp(filename):
56
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
57
  torch_dtype = torch.bfloat16
58
 
59
- #checkpoint = "microsoft/Phi-3.5-mini-instruct"
60
- #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
61
- #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
62
- #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
63
- #vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #, device_map='cpu') #.to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
64
-
65
  pipe = StableDiffusion3Pipeline.from_pretrained(
66
- #"stabilityai/stable-diffusion-3.5-large",
67
  "ford442/stable-diffusion-3.5-large-bf16",
68
- # vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_safetensors=True, subfolder='vae',token=True),
69
- # transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True),
70
- # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder',token=True),
71
  # text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
72
  # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
 
73
  token=True,
 
74
  #use_safetensors=False,
75
  )
76
 
77
- pipe.to(device=device, dtype=torch.bfloat16)
78
-
79
- #pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
80
- #pipe = StableDiffusion3Pipeline.from_pretrained("ford442/RealVis_Medium_1.0b_bf16", torch_dtype=torch.bfloat16)
81
- #pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium", token=hftoken, torch_dtype=torch.float32, device_map='balanced')
82
-
83
- # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
84
-
85
- #pipe.scheduler.config.requires_aesthetics_score = False
86
- #pipe.enable_model_cpu_offload()
87
- #pipe.to(device)
88
  #pipe.to(device=device, dtype=torch.bfloat16)
89
- #pipe = torch.compile(pipe)
90
- # pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, beta_schedule="scaled_linear")
91
-
92
- #refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("ford442/stable-diffusion-xl-refiner-1.0-bf16",vae = vaeXL, requires_aesthetics_score=True) #.to(torch.bfloat16)
93
- #refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=vae, torch_dtype=torch.float32, requires_aesthetics_score=True, device_map='balanced')
94
- #refiner.scheduler=EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
95
- #refiner.enable_model_cpu_offload()
96
 
 
 
97
  #pipe.to(device=device, dtype=torch.bfloat16)
98
 
99
- #refiner.scheduler.config.requires_aesthetics_score=False
100
- #refiner.to(device)
101
- #refiner = torch.compile(refiner)
102
- #refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config, beta_schedule="scaled_linear")
103
- #refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
104
-
105
- #tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=True)
106
- #tokenizer.tokenizer_legacy=False
107
- #model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
108
- #model = torch.compile(model)
109
-
110
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
111
 
112
  def filter_text(text,phraseC):
@@ -153,62 +120,10 @@ def infer(
153
  torch.set_float32_matmul_precision("highest")
154
  seed = random.randint(0, MAX_SEED)
155
  generator = torch.Generator(device='cuda').manual_seed(seed)
156
- '''
157
- if expanded:
158
- system_prompt_rewrite = (
159
- "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
160
- )
161
- user_prompt_rewrite = (
162
- "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
163
- )
164
- user_prompt_rewrite_2 = (
165
- "Rephrase this scene to have more elaborate details: "
166
- )
167
- input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
168
- input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
169
- print("-- got prompt --")
170
- # Encode the input text and include the attention mask
171
- encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
172
- encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
173
- # Ensure all values are on the correct device
174
- input_ids = encoded_inputs["input_ids"].to(device)
175
- input_ids_2 = encoded_inputs_2["input_ids"].to(device)
176
- attention_mask = encoded_inputs["attention_mask"].to(device)
177
- attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
178
- print("-- tokenize prompt --")
179
- # Google T5
180
- #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
181
- outputs = model.generate(
182
- input_ids=input_ids,
183
- attention_mask=attention_mask,
184
- max_new_tokens=512,
185
- temperature=0.2,
186
- top_p=0.9,
187
- do_sample=True,
188
- )
189
- outputs_2 = model.generate(
190
- input_ids=input_ids_2,
191
- attention_mask=attention_mask_2,
192
- max_new_tokens=65,
193
- temperature=0.2,
194
- top_p=0.9,
195
- do_sample=True,
196
- )
197
- # Use the encoded tensor 'text_inputs' here
198
- enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
199
- enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
200
- print('-- generated prompt --')
201
- enhanced_prompt = filter_text(enhanced_prompt,prompt)
202
- enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
203
- print('-- filtered prompt --')
204
- print(enhanced_prompt)
205
- print('-- filtered prompt 2 --')
206
- print(enhanced_prompt_2)
207
- else:
208
- '''
209
  enhanced_prompt = prompt
210
  enhanced_prompt_2 = prompt
211
- #model.to('cpu')
212
  if latent_file: # Check if a latent file is provided
213
  # initial_latents = pipe.prepare_latents(
214
  # batch_size=1,
@@ -263,7 +178,7 @@ def infer(
263
  # sd35_path = f"sd35_{seed}.png"
264
  # image_pil.save(sd35_path,optimize=False,compress_level=0)
265
  # upload_to_ftp(sd35_path)
266
- sd35_path = f"sd35_{seed}.png"
267
  sd_image.save(sd35_path,optimize=False,compress_level=0)
268
  upload_to_ftp(sd35_path)
269
  # Convert the generated image to a tensor
@@ -275,31 +190,14 @@ def infer(
275
  # Save the latents to a .pt file
276
  #torch.save(generated_latents, latent_path)
277
  #upload_to_ftp(latent_path)
278
- #refiner.scheduler.set_timesteps(num_inference_steps,device)
279
- '''
280
- pipe.to(torch.device('cpu'))
281
- refiner.to(device=device, dtype=torch.bfloat16)
282
- refine = refiner(
283
- prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
284
- negative_prompt = negative_prompt_1,
285
- negative_prompt_2 = negative_prompt_2,
286
- guidance_scale=7.5,
287
- num_inference_steps=num_inference_steps,
288
- image=sd_image,
289
- generator=generator,
290
- ).images[0]
291
- refine_path = f"sd35_refine_{seed}.png"
292
- refine.save(refine_path,optimize=False,compress_level=0)
293
- upload_to_ftp(refine_path)
294
- refiner.to(torch.device('cpu'))
295
- '''
296
  upscaler_2.to(torch.device('cuda'))
297
  with torch.no_grad():
298
  upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
299
  print('-- got upscaled image --')
300
- upscaler_2.to(torch.device('cpu'))
301
  downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
302
- upscale_path = f"sd35_upscale_{seed}.png"
303
  downscale2.save(upscale_path,optimize=False,compress_level=0)
304
  upload_to_ftp(upscale_path)
305
  return sd_image, seed, enhanced_prompt
 
1
  import spaces
2
  import gradio as gr
3
  import numpy as np
 
4
  import random
5
  import torch
6
+ from diffusers import StableDiffusion3Pipeline
7
+ #from transformers import CLIPTextModelWithProjection, T5EncoderModel
 
 
 
8
  import re
9
  import paramiko
10
  import urllib
 
12
  import os
13
  from image_gen_aux import UpscaleWithModel
14
  from huggingface_hub import hf_hub_download
15
+
16
+ #from diffusers import SD3Transformer2DModel, AutoencoderKL
17
  #from models.transformer_sd3 import SD3Transformer2DModel
18
  #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
19
  from PIL import Image
 
23
  FTP_PASS = "GoogleBez12!"
24
  FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
25
 
26
+ #torch.backends.cuda.matmul.allow_tf32 = False
27
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
28
  torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
29
+ #torch.backends.cudnn.allow_tf32 = False
30
  torch.backends.cudnn.deterministic = False
31
  #torch.backends.cudnn.benchmark = False
32
+ #torch.backends.cuda.preferred_blas_library="cublas"
33
  #torch.backends.cuda.preferred_linalg_library="cusolver"
34
 
35
  hftoken = os.getenv("HF_AUTH_TOKEN")
 
54
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
55
  torch_dtype = torch.bfloat16
56
 
 
 
 
 
 
 
57
  pipe = StableDiffusion3Pipeline.from_pretrained(
58
+ #"stabilityai # stable-diffusion-3.5-large",
59
  "ford442/stable-diffusion-3.5-large-bf16",
60
+ # vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
61
+ #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
62
+ # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
63
  # text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
64
  # text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
65
+ tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True)
66
  token=True,
67
+ torch_dtype=torch.bfloat16,
68
  #use_safetensors=False,
69
  )
70
 
 
 
 
 
 
 
 
 
 
 
 
71
  #pipe.to(device=device, dtype=torch.bfloat16)
 
 
 
 
 
 
 
72
 
73
+ #pipe.enable_model_cpu_offload()
74
+ pipe.to(device)
75
  #pipe.to(device=device, dtype=torch.bfloat16)
76
 
 
 
 
 
 
 
 
 
 
 
 
77
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
78
 
79
  def filter_text(text,phraseC):
 
120
  torch.set_float32_matmul_precision("highest")
121
  seed = random.randint(0, MAX_SEED)
122
  generator = torch.Generator(device='cuda').manual_seed(seed)
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  enhanced_prompt = prompt
125
  enhanced_prompt_2 = prompt
126
+
127
  if latent_file: # Check if a latent file is provided
128
  # initial_latents = pipe.prepare_latents(
129
  # batch_size=1,
 
178
  # sd35_path = f"sd35_{seed}.png"
179
  # image_pil.save(sd35_path,optimize=False,compress_level=0)
180
  # upload_to_ftp(sd35_path)
181
+ sd35_path = f"sd35l_{seed}.png"
182
  sd_image.save(sd35_path,optimize=False,compress_level=0)
183
  upload_to_ftp(sd35_path)
184
  # Convert the generated image to a tensor
 
190
  # Save the latents to a .pt file
191
  #torch.save(generated_latents, latent_path)
192
  #upload_to_ftp(latent_path)
193
+ # pipe.unet.to('cpu')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  upscaler_2.to(torch.device('cuda'))
195
  with torch.no_grad():
196
  upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
197
  print('-- got upscaled image --')
198
+ #upscaler_2.to(torch.device('cpu'))
199
  downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
200
+ upscale_path = f"sd35l_upscale_{seed}.png"
201
  downscale2.save(upscale_path,optimize=False,compress_level=0)
202
  upload_to_ftp(upscale_path)
203
  return sd_image, seed, enhanced_prompt