Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,10 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
-
#import tensorrt as trt
|
5 |
import random
|
6 |
import torch
|
7 |
-
from diffusers import StableDiffusion3Pipeline
|
8 |
-
from transformers import
|
9 |
-
#from threading import Thread
|
10 |
-
#from transformers import pipeline
|
11 |
-
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
12 |
import re
|
13 |
import paramiko
|
14 |
import urllib
|
@@ -16,6 +12,8 @@ import time
|
|
16 |
import os
|
17 |
from image_gen_aux import UpscaleWithModel
|
18 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
19 |
#from models.transformer_sd3 import SD3Transformer2DModel
|
20 |
#from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
|
21 |
from PIL import Image
|
@@ -25,13 +23,13 @@ FTP_USER = "ford442"
|
|
25 |
FTP_PASS = "GoogleBez12!"
|
26 |
FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
|
27 |
|
28 |
-
torch.backends.cuda.matmul.allow_tf32 = False
|
29 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
30 |
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
|
31 |
-
torch.backends.cudnn.allow_tf32 = False
|
32 |
torch.backends.cudnn.deterministic = False
|
33 |
#torch.backends.cudnn.benchmark = False
|
34 |
-
torch.backends.cuda.preferred_blas_library="cublas"
|
35 |
#torch.backends.cuda.preferred_linalg_library="cusolver"
|
36 |
|
37 |
hftoken = os.getenv("HF_AUTH_TOKEN")
|
@@ -56,57 +54,26 @@ def upload_to_ftp(filename):
|
|
56 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
57 |
torch_dtype = torch.bfloat16
|
58 |
|
59 |
-
#checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
60 |
-
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
61 |
-
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
62 |
-
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
63 |
-
#vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #, device_map='cpu') #.to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
|
64 |
-
|
65 |
pipe = StableDiffusion3Pipeline.from_pretrained(
|
66 |
-
#"stabilityai
|
67 |
"ford442/stable-diffusion-3.5-large-bf16",
|
68 |
-
# vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-
|
69 |
-
|
70 |
-
|
71 |
# text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
72 |
# text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
|
|
73 |
token=True,
|
|
|
74 |
#use_safetensors=False,
|
75 |
)
|
76 |
|
77 |
-
pipe.to(device=device, dtype=torch.bfloat16)
|
78 |
-
|
79 |
-
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
|
80 |
-
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/RealVis_Medium_1.0b_bf16", torch_dtype=torch.bfloat16)
|
81 |
-
#pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium", token=hftoken, torch_dtype=torch.float32, device_map='balanced')
|
82 |
-
|
83 |
-
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
|
84 |
-
|
85 |
-
#pipe.scheduler.config.requires_aesthetics_score = False
|
86 |
-
#pipe.enable_model_cpu_offload()
|
87 |
-
#pipe.to(device)
|
88 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
89 |
-
#pipe = torch.compile(pipe)
|
90 |
-
# pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, beta_schedule="scaled_linear")
|
91 |
-
|
92 |
-
#refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("ford442/stable-diffusion-xl-refiner-1.0-bf16",vae = vaeXL, requires_aesthetics_score=True) #.to(torch.bfloat16)
|
93 |
-
#refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=vae, torch_dtype=torch.float32, requires_aesthetics_score=True, device_map='balanced')
|
94 |
-
#refiner.scheduler=EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
|
95 |
-
#refiner.enable_model_cpu_offload()
|
96 |
|
|
|
|
|
97 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
98 |
|
99 |
-
#refiner.scheduler.config.requires_aesthetics_score=False
|
100 |
-
#refiner.to(device)
|
101 |
-
#refiner = torch.compile(refiner)
|
102 |
-
#refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config, beta_schedule="scaled_linear")
|
103 |
-
#refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
|
104 |
-
|
105 |
-
#tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=True)
|
106 |
-
#tokenizer.tokenizer_legacy=False
|
107 |
-
#model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
|
108 |
-
#model = torch.compile(model)
|
109 |
-
|
110 |
upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
|
111 |
|
112 |
def filter_text(text,phraseC):
|
@@ -153,62 +120,10 @@ def infer(
|
|
153 |
torch.set_float32_matmul_precision("highest")
|
154 |
seed = random.randint(0, MAX_SEED)
|
155 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
156 |
-
|
157 |
-
if expanded:
|
158 |
-
system_prompt_rewrite = (
|
159 |
-
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
160 |
-
)
|
161 |
-
user_prompt_rewrite = (
|
162 |
-
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
163 |
-
)
|
164 |
-
user_prompt_rewrite_2 = (
|
165 |
-
"Rephrase this scene to have more elaborate details: "
|
166 |
-
)
|
167 |
-
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
168 |
-
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
169 |
-
print("-- got prompt --")
|
170 |
-
# Encode the input text and include the attention mask
|
171 |
-
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
172 |
-
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
173 |
-
# Ensure all values are on the correct device
|
174 |
-
input_ids = encoded_inputs["input_ids"].to(device)
|
175 |
-
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
176 |
-
attention_mask = encoded_inputs["attention_mask"].to(device)
|
177 |
-
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
178 |
-
print("-- tokenize prompt --")
|
179 |
-
# Google T5
|
180 |
-
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
181 |
-
outputs = model.generate(
|
182 |
-
input_ids=input_ids,
|
183 |
-
attention_mask=attention_mask,
|
184 |
-
max_new_tokens=512,
|
185 |
-
temperature=0.2,
|
186 |
-
top_p=0.9,
|
187 |
-
do_sample=True,
|
188 |
-
)
|
189 |
-
outputs_2 = model.generate(
|
190 |
-
input_ids=input_ids_2,
|
191 |
-
attention_mask=attention_mask_2,
|
192 |
-
max_new_tokens=65,
|
193 |
-
temperature=0.2,
|
194 |
-
top_p=0.9,
|
195 |
-
do_sample=True,
|
196 |
-
)
|
197 |
-
# Use the encoded tensor 'text_inputs' here
|
198 |
-
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
199 |
-
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
200 |
-
print('-- generated prompt --')
|
201 |
-
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
202 |
-
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
203 |
-
print('-- filtered prompt --')
|
204 |
-
print(enhanced_prompt)
|
205 |
-
print('-- filtered prompt 2 --')
|
206 |
-
print(enhanced_prompt_2)
|
207 |
-
else:
|
208 |
-
'''
|
209 |
enhanced_prompt = prompt
|
210 |
enhanced_prompt_2 = prompt
|
211 |
-
|
212 |
if latent_file: # Check if a latent file is provided
|
213 |
# initial_latents = pipe.prepare_latents(
|
214 |
# batch_size=1,
|
@@ -263,7 +178,7 @@ def infer(
|
|
263 |
# sd35_path = f"sd35_{seed}.png"
|
264 |
# image_pil.save(sd35_path,optimize=False,compress_level=0)
|
265 |
# upload_to_ftp(sd35_path)
|
266 |
-
sd35_path = f"
|
267 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
268 |
upload_to_ftp(sd35_path)
|
269 |
# Convert the generated image to a tensor
|
@@ -275,31 +190,14 @@ def infer(
|
|
275 |
# Save the latents to a .pt file
|
276 |
#torch.save(generated_latents, latent_path)
|
277 |
#upload_to_ftp(latent_path)
|
278 |
-
#
|
279 |
-
'''
|
280 |
-
pipe.to(torch.device('cpu'))
|
281 |
-
refiner.to(device=device, dtype=torch.bfloat16)
|
282 |
-
refine = refiner(
|
283 |
-
prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
|
284 |
-
negative_prompt = negative_prompt_1,
|
285 |
-
negative_prompt_2 = negative_prompt_2,
|
286 |
-
guidance_scale=7.5,
|
287 |
-
num_inference_steps=num_inference_steps,
|
288 |
-
image=sd_image,
|
289 |
-
generator=generator,
|
290 |
-
).images[0]
|
291 |
-
refine_path = f"sd35_refine_{seed}.png"
|
292 |
-
refine.save(refine_path,optimize=False,compress_level=0)
|
293 |
-
upload_to_ftp(refine_path)
|
294 |
-
refiner.to(torch.device('cpu'))
|
295 |
-
'''
|
296 |
upscaler_2.to(torch.device('cuda'))
|
297 |
with torch.no_grad():
|
298 |
upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
299 |
print('-- got upscaled image --')
|
300 |
-
upscaler_2.to(torch.device('cpu'))
|
301 |
downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
|
302 |
-
upscale_path = f"
|
303 |
downscale2.save(upscale_path,optimize=False,compress_level=0)
|
304 |
upload_to_ftp(upscale_path)
|
305 |
return sd_image, seed, enhanced_prompt
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
|
|
4 |
import random
|
5 |
import torch
|
6 |
+
from diffusers import StableDiffusion3Pipeline
|
7 |
+
#from transformers import CLIPTextModelWithProjection, T5EncoderModel
|
|
|
|
|
|
|
8 |
import re
|
9 |
import paramiko
|
10 |
import urllib
|
|
|
12 |
import os
|
13 |
from image_gen_aux import UpscaleWithModel
|
14 |
from huggingface_hub import hf_hub_download
|
15 |
+
|
16 |
+
#from diffusers import SD3Transformer2DModel, AutoencoderKL
|
17 |
#from models.transformer_sd3 import SD3Transformer2DModel
|
18 |
#from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
|
19 |
from PIL import Image
|
|
|
23 |
FTP_PASS = "GoogleBez12!"
|
24 |
FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server
|
25 |
|
26 |
+
#torch.backends.cuda.matmul.allow_tf32 = False
|
27 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
28 |
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
|
29 |
+
#torch.backends.cudnn.allow_tf32 = False
|
30 |
torch.backends.cudnn.deterministic = False
|
31 |
#torch.backends.cudnn.benchmark = False
|
32 |
+
#torch.backends.cuda.preferred_blas_library="cublas"
|
33 |
#torch.backends.cuda.preferred_linalg_library="cusolver"
|
34 |
|
35 |
hftoken = os.getenv("HF_AUTH_TOKEN")
|
|
|
54 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
55 |
torch_dtype = torch.bfloat16
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
pipe = StableDiffusion3Pipeline.from_pretrained(
|
58 |
+
#"stabilityai # stable-diffusion-3.5-large",
|
59 |
"ford442/stable-diffusion-3.5-large-bf16",
|
60 |
+
# vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
|
61 |
+
#scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
|
62 |
+
# text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
|
63 |
# text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
|
64 |
# text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
|
65 |
+
tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True)
|
66 |
token=True,
|
67 |
+
torch_dtype=torch.bfloat16,
|
68 |
#use_safetensors=False,
|
69 |
)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
#pipe.enable_model_cpu_offload()
|
74 |
+
pipe.to(device)
|
75 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
|
78 |
|
79 |
def filter_text(text,phraseC):
|
|
|
120 |
torch.set_float32_matmul_precision("highest")
|
121 |
seed = random.randint(0, MAX_SEED)
|
122 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
123 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
enhanced_prompt = prompt
|
125 |
enhanced_prompt_2 = prompt
|
126 |
+
|
127 |
if latent_file: # Check if a latent file is provided
|
128 |
# initial_latents = pipe.prepare_latents(
|
129 |
# batch_size=1,
|
|
|
178 |
# sd35_path = f"sd35_{seed}.png"
|
179 |
# image_pil.save(sd35_path,optimize=False,compress_level=0)
|
180 |
# upload_to_ftp(sd35_path)
|
181 |
+
sd35_path = f"sd35l_{seed}.png"
|
182 |
sd_image.save(sd35_path,optimize=False,compress_level=0)
|
183 |
upload_to_ftp(sd35_path)
|
184 |
# Convert the generated image to a tensor
|
|
|
190 |
# Save the latents to a .pt file
|
191 |
#torch.save(generated_latents, latent_path)
|
192 |
#upload_to_ftp(latent_path)
|
193 |
+
# pipe.unet.to('cpu')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
upscaler_2.to(torch.device('cuda'))
|
195 |
with torch.no_grad():
|
196 |
upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
|
197 |
print('-- got upscaled image --')
|
198 |
+
#upscaler_2.to(torch.device('cpu'))
|
199 |
downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
|
200 |
+
upscale_path = f"sd35l_upscale_{seed}.png"
|
201 |
downscale2.save(upscale_path,optimize=False,compress_level=0)
|
202 |
upload_to_ftp(upscale_path)
|
203 |
return sd_image, seed, enhanced_prompt
|