Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
25bf19b
1
Parent(s):
db851e8
yes
Browse files
app.py
CHANGED
@@ -111,34 +111,16 @@ def encode_sdxl_prompt(prompt, negative_prompt=""):
|
|
111 |
clip_l_embeds = pipe.text_encoder(tokens_l)[0]
|
112 |
neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
|
113 |
|
114 |
-
# CLIP-G embeddings (1280d) -
|
115 |
clip_g_output = pipe.text_encoder_2(tokens_g)
|
116 |
-
|
117 |
-
print(f"CLIP-G output length: {len(clip_g_output) if hasattr(clip_g_output, '__len__') else 'no len'}")
|
118 |
-
if hasattr(clip_g_output, '__len__') and len(clip_g_output) > 0:
|
119 |
-
print(f"CLIP-G [0] shape: {clip_g_output[0].shape}")
|
120 |
-
if len(clip_g_output) > 1:
|
121 |
-
print(f"CLIP-G [1] shape: {clip_g_output[1].shape}")
|
122 |
|
123 |
-
# Try different ways to get the sequence embeddings
|
124 |
-
if hasattr(clip_g_output, 'last_hidden_state'):
|
125 |
-
clip_g_embeds = clip_g_output.last_hidden_state
|
126 |
-
elif hasattr(clip_g_output, '__len__') and len(clip_g_output) > 0:
|
127 |
-
clip_g_embeds = clip_g_output[0]
|
128 |
-
else:
|
129 |
-
clip_g_embeds = clip_g_output
|
130 |
-
|
131 |
neg_clip_g_output = pipe.text_encoder_2(neg_tokens_g)
|
132 |
-
|
133 |
-
neg_clip_g_embeds = neg_clip_g_output.last_hidden_state
|
134 |
-
elif hasattr(neg_clip_g_output, '__len__') and len(neg_clip_g_output) > 0:
|
135 |
-
neg_clip_g_embeds = neg_clip_g_output[0]
|
136 |
-
else:
|
137 |
-
neg_clip_g_embeds = neg_clip_g_output
|
138 |
|
139 |
# Pooled embeddings for SDXL
|
140 |
-
pooled_embeds = clip_g_output[
|
141 |
-
neg_pooled_embeds = neg_clip_g_output[
|
142 |
|
143 |
return {
|
144 |
"clip_l": clip_l_embeds,
|
@@ -233,6 +215,7 @@ def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noi
|
|
233 |
guidance_scale=cfg_scale,
|
234 |
width=width,
|
235 |
height=height,
|
|
|
236 |
generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
|
237 |
).images[0]
|
238 |
|
|
|
111 |
clip_l_embeds = pipe.text_encoder(tokens_l)[0]
|
112 |
neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
|
113 |
|
114 |
+
# CLIP-G embeddings (1280d) - [0] is pooled, [1] is sequence (opposite of CLIP-L)
|
115 |
clip_g_output = pipe.text_encoder_2(tokens_g)
|
116 |
+
clip_g_embeds = clip_g_output[1] # sequence embeddings
|
|
|
|
|
|
|
|
|
|
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
neg_clip_g_output = pipe.text_encoder_2(neg_tokens_g)
|
119 |
+
neg_clip_g_embeds = neg_clip_g_output[1] # sequence embeddings
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
# Pooled embeddings for SDXL
|
122 |
+
pooled_embeds = clip_g_output[0] # pooled embeddings
|
123 |
+
neg_pooled_embeds = neg_clip_g_output[0] # pooled embeddings
|
124 |
|
125 |
return {
|
126 |
"clip_l": clip_l_embeds,
|
|
|
215 |
guidance_scale=cfg_scale,
|
216 |
width=width,
|
217 |
height=height,
|
218 |
+
num_images_per_prompt=1, # Explicitly set this
|
219 |
generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
|
220 |
).images[0]
|
221 |
|