ford442 commited on
Commit
e3da6fc
·
verified ·
1 Parent(s): 803fc0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -53,8 +53,8 @@ torch_dtype = torch.bfloat16
53
 
54
  checkpoint = "microsoft/Phi-3.5-mini-instruct"
55
  #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
56
- #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
57
- vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
58
 
59
  pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
60
  #pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
@@ -131,15 +131,20 @@ def infer(
131
  user_prompt_rewrite = (
132
  "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
133
  )
 
 
 
134
  input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
 
135
  print("-- got prompt --")
136
  # Encode the input text and include the attention mask
137
- encoded_inputs = tokenizer(
138
- input_text, return_tensors="pt", return_attention_mask=True
139
- )
140
  # Ensure all values are on the correct device
141
  input_ids = encoded_inputs["input_ids"].to(device)
 
142
  attention_mask = encoded_inputs["attention_mask"].to(device)
 
143
  print("-- tokenize prompt --")
144
  # Google T5
145
  #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
@@ -151,12 +156,24 @@ def infer(
151
  top_p=0.9,
152
  do_sample=True,
153
  )
 
 
 
 
 
 
 
 
154
  # Use the encoded tensor 'text_inputs' here
155
  enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
156
  print('-- generated prompt --')
157
  enhanced_prompt = filter_text(enhanced_prompt,prompt)
 
158
  print('-- filtered prompt --')
159
  print(enhanced_prompt)
 
 
160
  if latent_file: # Check if a latent file is provided
161
  # initial_latents = pipe.prepare_latents(
162
  # batch_size=1,
@@ -188,7 +205,7 @@ def infer(
188
  with torch.no_grad():
189
  sd_image = pipe(
190
  prompt=enhanced_prompt, # This conversion is fine
191
- prompt_2=prompt,
192
  prompt_3=prompt,
193
  negative_prompt=negative_prompt,
194
  guidance_scale=guidance_scale,
@@ -213,13 +230,13 @@ def infer(
213
  upload_to_ftp(latent_path)
214
  #refiner.scheduler.set_timesteps(num_inference_steps,device)
215
  refine = refiner(
216
- prompt=f"{prompt}, high quality masterpiece, complex details",
217
  negative_prompt = negative_prompt,
218
  guidance_scale=7.5,
219
  num_inference_steps=num_inference_steps,
220
  image=sd_image,
221
  generator=generator,
222
- ).images[0]
223
  refine_path = f"sd35m_refine_{seed}.png"
224
  refine.save(refine_path,optimize=False,compress_level=0)
225
  upload_to_ftp(refine_path)
 
53
 
54
  checkpoint = "microsoft/Phi-3.5-mini-instruct"
55
  #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
56
+ vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
57
+ #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
58
 
59
  pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16", torch_dtype=torch.bfloat16).to(torch.device("cuda:0"))
60
  #pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
 
131
  user_prompt_rewrite = (
132
  "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
133
  )
134
+ user_prompt_rewrite_2 = (
135
+ "Rephrase this scene to have more elaborate details: "
136
+ )
137
  input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
138
+ input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
139
  print("-- got prompt --")
140
  # Encode the input text and include the attention mask
141
+ encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
142
+ encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
 
143
  # Ensure all values are on the correct device
144
  input_ids = encoded_inputs["input_ids"].to(device)
145
+ input_ids_2 = encoded_inputs_2["input_ids"].to(device)
146
  attention_mask = encoded_inputs["attention_mask"].to(device)
147
+ attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
148
  print("-- tokenize prompt --")
149
  # Google T5
150
  #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
 
156
  top_p=0.9,
157
  do_sample=True,
158
  )
159
+ outputs_2 = model.generate(
160
+ input_ids=input_ids_2,
161
+ attention_mask=attention_mask_2,
162
+ max_new_tokens=65,
163
+ temperature=0.2,
164
+ top_p=0.9,
165
+ do_sample=True,
166
+ )
167
  # Use the encoded tensor 'text_inputs' here
168
  enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
169
+ enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
170
  print('-- generated prompt --')
171
  enhanced_prompt = filter_text(enhanced_prompt,prompt)
172
+ enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
173
  print('-- filtered prompt --')
174
  print(enhanced_prompt)
175
+ print('-- filtered prompt 2 --')
176
+ print(enhanced_prompt_2)
177
  if latent_file: # Check if a latent file is provided
178
  # initial_latents = pipe.prepare_latents(
179
  # batch_size=1,
 
205
  with torch.no_grad():
206
  sd_image = pipe(
207
  prompt=enhanced_prompt, # This conversion is fine
208
+ prompt_2=enhanced_prompt_2,
209
  prompt_3=prompt,
210
  negative_prompt=negative_prompt,
211
  guidance_scale=guidance_scale,
 
230
  upload_to_ftp(latent_path)
231
  #refiner.scheduler.set_timesteps(num_inference_steps,device)
232
  refine = refiner(
233
+ prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
234
  negative_prompt = negative_prompt,
235
  guidance_scale=7.5,
236
  num_inference_steps=num_inference_steps,
237
  image=sd_image,
238
  generator=generator,
239
+ ).images[0]
240
  refine_path = f"sd35m_refine_{seed}.png"
241
  refine.save(refine_path,optimize=False,compress_level=0)
242
  upload_to_ftp(refine_path)