1inkusFace commited on
Commit
7a7f101
·
verified ·
1 Parent(s): 66813a7

Update pipeline_stable_diffusion_3_ipa.py

Browse files
Files changed (1) hide show
  1. pipeline_stable_diffusion_3_ipa.py +9 -6
pipeline_stable_diffusion_3_ipa.py CHANGED
@@ -1206,21 +1206,24 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1206
 
1207
  # 1. Stack the image embeddings
1208
  stacked_image_embeds = torch.stack(image_prompt_embeds_list)
1209
-
1210
  # 2. Calculate the mean of the stacked embeddings
1211
  average_image_embed = torch.mean(stacked_image_embeds, dim=0) #.unsqueeze(0) # Add batch dimension after averaging
1212
-
1213
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1214
  zeros_tensor = torch.zeros_like(average_image_embed)
1215
-
1216
  # 4. Concatenate the zeros and the average embedding
1217
  clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
 
 
 
1218
 
1219
  bs_embed, seq_len, _ = clip_image_embeds.shape
1220
  clip_image_embeds = clip_image_embeds.repeat(1, 1, 1)
1221
- clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1222
- clip_image_embeds = clip_image_embeds.view(2, 1, -1)
1223
-
1224
  #clip_image_embeds = torch.cat([torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
1225
 
1226
  # 4. Prepare timesteps
 
1206
 
1207
  # 1. Stack the image embeddings
1208
  stacked_image_embeds = torch.stack(image_prompt_embeds_list)
1209
+ print('shape 1: ', stacked_image_embeds.shape)
1210
  # 2. Calculate the mean of the stacked embeddings
1211
  average_image_embed = torch.mean(stacked_image_embeds, dim=0) #.unsqueeze(0) # Add batch dimension after averaging
1212
+ print('shape 2: ', average_image_embed.shape)
1213
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1214
  zeros_tensor = torch.zeros_like(average_image_embed)
1215
+ print('shape 3: ', zeros_tensor.shape)
1216
  # 4. Concatenate the zeros and the average embedding
1217
  clip_image_embeds = torch.cat([zeros_tensor, average_image_embed], dim=0)
1218
+ print('shape 4: ', clip_image_embeds.shape)
1219
+ clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1220
+ print('shape 5: ', clip_image_embeds.shape)
1221
 
1222
  bs_embed, seq_len, _ = clip_image_embeds.shape
1223
  clip_image_embeds = clip_image_embeds.repeat(1, 1, 1)
1224
+ print('shape 6: ', clip_image_embeds.shape)
1225
+ clip_image_embeds = clip_image_embeds.view(bs_embed, 1, -1)
1226
+ print('shape 7: ', clip_image_embeds.shape)
1227
  #clip_image_embeds = torch.cat([torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
1228
 
1229
  # 4. Prepare timesteps