Spaces:
Running
on
Zero
Running
on
Zero
Update pipeline_stable_diffusion_3_ipa.py
Browse files
pipeline_stable_diffusion_3_ipa.py
CHANGED
|
@@ -1206,21 +1206,24 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
| 1206 |
|
| 1207 |
# 1. Stack the image embeddings
|
| 1208 |
stacked_image_embeds = torch.stack(image_prompt_embeds_list)
|
| 1209 |
-
|
| 1210 |
# 2. Calculate the mean of the stacked embeddings
|
| 1211 |
average_image_embed = torch.mean(stacked_image_embeds, dim=0) #.unsqueeze(0) # Add batch dimension after averaging
|
| 1212 |
-
|
| 1213 |
# 3. Create a tensor of zeros with the same shape as the averaged embedding
|
| 1214 |
zeros_tensor = torch.zeros_like(average_image_embed)
|
| 1215 |
-
|
| 1216 |
# 4. Concatenate the zeros and the average embedding
|
| 1217 |
clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
|
|
|
|
|
|
|
|
|
|
| 1218 |
|
| 1219 |
bs_embed, seq_len, _ = clip_image_embeds.shape
|
| 1220 |
clip_image_embeds = clip_image_embeds.repeat(1, 1, 1)
|
| 1221 |
-
|
| 1222 |
-
clip_image_embeds = clip_image_embeds.view(
|
| 1223 |
-
|
| 1224 |
#clip_image_embeds = torch.cat([torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
|
| 1225 |
|
| 1226 |
# 4. Prepare timesteps
|
|
|
|
| 1206 |
|
| 1207 |
# 1. Stack the image embeddings
|
| 1208 |
stacked_image_embeds = torch.stack(image_prompt_embeds_list)
|
| 1209 |
+
print('shape 1: ', stacked_image_embeds.shape)
|
| 1210 |
# 2. Calculate the mean of the stacked embeddings
|
| 1211 |
average_image_embed = torch.mean(stacked_image_embeds, dim=0) #.unsqueeze(0) # Add batch dimension after averaging
|
| 1212 |
+
print('shape 2: ', average_image_embed.shape)
|
| 1213 |
# 3. Create a tensor of zeros with the same shape as the averaged embedding
|
| 1214 |
zeros_tensor = torch.zeros_like(average_image_embed)
|
| 1215 |
+
print('shape 3: ', zeros_tensor.shape)
|
| 1216 |
# 4. Concatenate the zeros and the average embedding
|
| 1217 |
clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
|
| 1218 |
+
print('shape 4: ', clip_image_embeds.shape)
|
| 1219 |
+
clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
|
| 1220 |
+
print('shape 5: ', clip_image_embeds.shape)
|
| 1221 |
|
| 1222 |
bs_embed, seq_len, _ = clip_image_embeds.shape
|
| 1223 |
clip_image_embeds = clip_image_embeds.repeat(1, 1, 1)
|
| 1224 |
+
print('shape 6: ', clip_image_embeds.shape)
|
| 1225 |
+
clip_image_embeds = clip_image_embeds.view(bs_embed, 1, -1)
|
| 1226 |
+
print('shape 7: ', clip_image_embeds.shape)
|
| 1227 |
#clip_image_embeds = torch.cat([torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
|
| 1228 |
|
| 1229 |
# 4. Prepare timesteps
|