1inkusFace commited on
Commit
209a990
·
verified ·
1 Parent(s): 0ab4dee

Update pipeline_stable_diffusion_3_ipa.py

Browse files
Files changed (1) hide show
  1. pipeline_stable_diffusion_3_ipa.py +10 -6
pipeline_stable_diffusion_3_ipa.py CHANGED
@@ -1215,24 +1215,28 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1215
  print('shape 2: ', average_image_embed.shape)
1216
  average_image_embedf = torch.mean(stacked_image_embeds, dim=1).unsqueeze(0) # Add batch dimension after averaging
1217
  print('shape 2a: ', average_image_embedf.shape)
1218
- average_image_embede = torch.mean(stacked_image_embeds, dim=1) # Add batch dimension after averaging
1219
- print('shape 2b: ', average_image_embede.shape)
1220
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1221
  zeros_tensor = torch.zeros_like(average_image_embed)
1222
  print('shape 3: ', zeros_tensor.shape)
 
 
 
 
 
 
 
1223
  # 4. Concatenate the zeros and the average embedding
1224
  clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
1225
- clip_image_embeds2 = torch.cat([zeros_tensor, average_image_embede], dim=0)
1226
  print('shape 4: ', clip_image_embeds.shape)
1227
  print('shape 4a: ', clip_image_embeds2.shape)
 
1228
  '''
1229
  #clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1230
  print('shape 5: ', clip_image_embeds.shape)
1231
 
1232
  bs_embed, seq_len, _ = clip_image_embeds.shape
1233
- clip_image_embeds = clip_image_embeds.repeat(1, 1, 1)
1234
- print('shape 6: ', clip_image_embeds.shape)
1235
-
1236
  clip_image_embedsa = clip_image_embeds.view(bs_embed, 1, -1)
1237
  print('shape 7: ', clip_image_embedsa.shape)
1238
  clip_image_embedsb = clip_image_embeds.view(seq_len, -1)
 
1215
  print('shape 2: ', average_image_embed.shape)
1216
  average_image_embedf = torch.mean(stacked_image_embeds, dim=1).unsqueeze(0) # Add batch dimension after averaging
1217
  print('shape 2a: ', average_image_embedf.shape)
1218
+
 
1219
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1220
  zeros_tensor = torch.zeros_like(average_image_embed)
1221
  print('shape 3: ', zeros_tensor.shape)
1222
+ zeros_tensor = torch.zeros_like(average_image_embed)
1223
+ zeros_tensora = average_image_embed.repeat(1, 1, 1)
1224
+ print('shape 3.1: ', clip_image_embedsa.shape)
1225
+ clip_image_embedsa = average_image_embed.repeat(1, 1, 1)
1226
+ print('shape 3.5: ', clip_image_embedsa.shape)
1227
+ clip_image_embedse = torch.cat([zeros_tensora, average_image_embeda], dim=0)
1228
+ print('shape 3.8: ', clip_image_embedse.shape)
1229
  # 4. Concatenate the zeros and the average embedding
1230
  clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
 
1231
  print('shape 4: ', clip_image_embeds.shape)
1232
  print('shape 4a: ', clip_image_embeds2.shape)
1233
+ clip_image_embeds2 = torch.cat([zeros_tensor, average_image_embede], dim=0)
1234
  '''
1235
  #clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1236
  print('shape 5: ', clip_image_embeds.shape)
1237
 
1238
  bs_embed, seq_len, _ = clip_image_embeds.shape
1239
+
 
 
1240
  clip_image_embedsa = clip_image_embeds.view(bs_embed, 1, -1)
1241
  print('shape 7: ', clip_image_embedsa.shape)
1242
  clip_image_embedsb = clip_image_embeds.view(seq_len, -1)