Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -53,8 +53,8 @@ torch_dtype = torch.bfloat16
|
|
53 |
|
54 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
55 |
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
56 |
-
|
57 |
-
vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
58 |
|
59 |
pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
60 |
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
|
@@ -131,15 +131,20 @@ def infer(
|
|
131 |
user_prompt_rewrite = (
|
132 |
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
133 |
)
|
|
|
|
|
|
|
134 |
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
|
|
135 |
print("-- got prompt --")
|
136 |
# Encode the input text and include the attention mask
|
137 |
-
encoded_inputs = tokenizer(
|
138 |
-
|
139 |
-
)
|
140 |
# Ensure all values are on the correct device
|
141 |
input_ids = encoded_inputs["input_ids"].to(device)
|
|
|
142 |
attention_mask = encoded_inputs["attention_mask"].to(device)
|
|
|
143 |
print("-- tokenize prompt --")
|
144 |
# Google T5
|
145 |
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
@@ -151,12 +156,24 @@ def infer(
|
|
151 |
top_p=0.9,
|
152 |
do_sample=True,
|
153 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# Use the encoded tensor 'text_inputs' here
|
155 |
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
156 |
print('-- generated prompt --')
|
157 |
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
|
|
158 |
print('-- filtered prompt --')
|
159 |
print(enhanced_prompt)
|
|
|
|
|
160 |
if latent_file: # Check if a latent file is provided
|
161 |
# initial_latents = pipe.prepare_latents(
|
162 |
# batch_size=1,
|
@@ -188,7 +205,7 @@ def infer(
|
|
188 |
with torch.no_grad():
|
189 |
sd_image = pipe(
|
190 |
prompt=enhanced_prompt, # This conversion is fine
|
191 |
-
prompt_2=
|
192 |
prompt_3=prompt,
|
193 |
negative_prompt=negative_prompt,
|
194 |
guidance_scale=guidance_scale,
|
@@ -213,13 +230,13 @@ def infer(
|
|
213 |
upload_to_ftp(latent_path)
|
214 |
#refiner.scheduler.set_timesteps(num_inference_steps,device)
|
215 |
refine = refiner(
|
216 |
-
prompt=f"{
|
217 |
negative_prompt = negative_prompt,
|
218 |
guidance_scale=7.5,
|
219 |
num_inference_steps=num_inference_steps,
|
220 |
image=sd_image,
|
221 |
generator=generator,
|
222 |
-
).images[0]
|
223 |
refine_path = f"sd35m_refine_{seed}.png"
|
224 |
refine.save(refine_path,optimize=False,compress_level=0)
|
225 |
upload_to_ftp(refine_path)
|
|
|
53 |
|
54 |
checkpoint = "microsoft/Phi-3.5-mini-instruct"
|
55 |
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
56 |
+
vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
57 |
+
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
|
58 |
|
59 |
pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
|
60 |
#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
|
|
|
131 |
user_prompt_rewrite = (
|
132 |
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
133 |
)
|
134 |
+
user_prompt_rewrite_2 = (
|
135 |
+
"Rephrase this scene to have more elaborate details: "
|
136 |
+
)
|
137 |
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
138 |
+
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
139 |
print("-- got prompt --")
|
140 |
# Encode the input text and include the attention mask
|
141 |
+
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
142 |
+
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
|
|
143 |
# Ensure all values are on the correct device
|
144 |
input_ids = encoded_inputs["input_ids"].to(device)
|
145 |
+
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
146 |
attention_mask = encoded_inputs["attention_mask"].to(device)
|
147 |
+
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
148 |
print("-- tokenize prompt --")
|
149 |
# Google T5
|
150 |
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
|
|
156 |
top_p=0.9,
|
157 |
do_sample=True,
|
158 |
)
|
159 |
+
outputs_2 = model.generate(
|
160 |
+
input_ids=input_ids_2,
|
161 |
+
attention_mask=attention_mask_2,
|
162 |
+
max_new_tokens=65,
|
163 |
+
temperature=0.2,
|
164 |
+
top_p=0.9,
|
165 |
+
do_sample=True,
|
166 |
+
)
|
167 |
# Use the encoded tensor 'text_inputs' here
|
168 |
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
169 |
+
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
170 |
print('-- generated prompt --')
|
171 |
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
172 |
+
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
173 |
print('-- filtered prompt --')
|
174 |
print(enhanced_prompt)
|
175 |
+
print('-- filtered prompt 2 --')
|
176 |
+
print(enhanced_prompt_2)
|
177 |
if latent_file: # Check if a latent file is provided
|
178 |
# initial_latents = pipe.prepare_latents(
|
179 |
# batch_size=1,
|
|
|
205 |
with torch.no_grad():
|
206 |
sd_image = pipe(
|
207 |
prompt=enhanced_prompt, # This conversion is fine
|
208 |
+
prompt_2=enhanced_prompt_2,
|
209 |
prompt_3=prompt,
|
210 |
negative_prompt=negative_prompt,
|
211 |
guidance_scale=guidance_scale,
|
|
|
230 |
upload_to_ftp(latent_path)
|
231 |
#refiner.scheduler.set_timesteps(num_inference_steps,device)
|
232 |
refine = refiner(
|
233 |
+
prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
|
234 |
negative_prompt = negative_prompt,
|
235 |
guidance_scale=7.5,
|
236 |
num_inference_steps=num_inference_steps,
|
237 |
image=sd_image,
|
238 |
generator=generator,
|
239 |
+
).images[0]
|
240 |
refine_path = f"sd35m_refine_{seed}.png"
|
241 |
refine.save(refine_path,optimize=False,compress_level=0)
|
242 |
upload_to_ftp(refine_path)
|