Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -122,55 +122,59 @@ def infer(
|
|
122 |
):
|
123 |
seed = random.randint(0, MAX_SEED)
|
124 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
174 |
if latent_file: # Check if a latent file is provided
|
175 |
# initial_latents = pipe.prepare_latents(
|
176 |
# batch_size=1,
|
@@ -216,13 +220,19 @@ def infer(
|
|
216 |
max_sequence_length=512
|
217 |
).images[0]
|
218 |
print('-- got image --')
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
# Convert the generated image to a tensor
|
228 |
#generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
|
@@ -293,7 +303,6 @@ def repeat_infer(
|
|
293 |
i += 1
|
294 |
return result, seed, image_path, enhanced_prompt
|
295 |
|
296 |
-
|
297 |
with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
|
298 |
with gr.Column(elem_id="col-container"):
|
299 |
gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Medium (with refine)")
|
|
|
122 |
):
|
123 |
seed = random.randint(0, MAX_SEED)
|
124 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
125 |
+
if expanded:
|
126 |
+
system_prompt_rewrite = (
|
127 |
+
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
128 |
+
)
|
129 |
+
user_prompt_rewrite = (
|
130 |
+
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
131 |
+
)
|
132 |
+
user_prompt_rewrite_2 = (
|
133 |
+
"Rephrase this scene to have more elaborate details: "
|
134 |
+
)
|
135 |
+
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
136 |
+
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
137 |
+
print("-- got prompt --")
|
138 |
+
# Encode the input text and include the attention mask
|
139 |
+
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
140 |
+
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
141 |
+
# Ensure all values are on the correct device
|
142 |
+
input_ids = encoded_inputs["input_ids"].to(device)
|
143 |
+
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
144 |
+
attention_mask = encoded_inputs["attention_mask"].to(device)
|
145 |
+
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
146 |
+
print("-- tokenize prompt --")
|
147 |
+
# Google T5
|
148 |
+
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
149 |
+
outputs = model.generate(
|
150 |
+
input_ids=input_ids,
|
151 |
+
attention_mask=attention_mask,
|
152 |
+
max_new_tokens=512,
|
153 |
+
temperature=0.2,
|
154 |
+
top_p=0.9,
|
155 |
+
do_sample=True,
|
156 |
+
)
|
157 |
+
outputs_2 = model.generate(
|
158 |
+
input_ids=input_ids_2,
|
159 |
+
attention_mask=attention_mask_2,
|
160 |
+
max_new_tokens=65,
|
161 |
+
temperature=0.2,
|
162 |
+
top_p=0.9,
|
163 |
+
do_sample=True,
|
164 |
+
)
|
165 |
+
# Use the encoded tensor 'text_inputs' here
|
166 |
+
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
167 |
+
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
168 |
+
print('-- generated prompt --')
|
169 |
+
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
170 |
+
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
171 |
+
print('-- filtered prompt --')
|
172 |
+
print(enhanced_prompt)
|
173 |
+
print('-- filtered prompt 2 --')
|
174 |
+
print(enhanced_prompt_2)
|
175 |
+
else:
|
176 |
+
enhanced_prompt = prompt
|
177 |
+
enhanced_prompt_2 = prompt
|
178 |
if latent_file: # Check if a latent file is provided
|
179 |
# initial_latents = pipe.prepare_latents(
|
180 |
# batch_size=1,
|
|
|
220 |
max_sequence_length=512
|
221 |
).images[0]
|
222 |
print('-- got image --')
|
223 |
+
|
224 |
+
sd35_image_image = pipe.vae.decode(sd_image / 0.18215).sample
|
225 |
+
sd35_image = sd35_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
|
226 |
+
sd35_image = (sd35_image * 255).round().astype("uint8")
|
227 |
+
image_pil = Image.fromarray(sd35_image[0])
|
228 |
+
sd35_path = f"tst_rv_{seed}.png"
|
229 |
+
image_pil.save(sd35_path,optimize=False,compress_level=0)
|
230 |
+
upload_to_ftp(sd35_path)
|
231 |
+
|
232 |
+
|
233 |
+
#sd35_path = f"sd35_{seed}.png"
|
234 |
+
#sd_image.save(sd35_path,optimize=False,compress_level=0)
|
235 |
+
#upload_to_ftp(sd35_path)
|
236 |
|
237 |
# Convert the generated image to a tensor
|
238 |
#generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
|
|
|
303 |
i += 1
|
304 |
return result, seed, image_path, enhanced_prompt
|
305 |
|
|
|
306 |
with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
|
307 |
with gr.Column(elem_id="col-container"):
|
308 |
gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Medium (with refine)")
|