1inkusFace commited on
Commit
04cc156
·
verified ·
1 Parent(s): 4c32653

Update pipeline_stable_diffusion_3_ipa.py

Browse files
Files changed (1) hide show
  1. pipeline_stable_diffusion_3_ipa.py +22 -11
pipeline_stable_diffusion_3_ipa.py CHANGED
@@ -1204,7 +1204,16 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1204
  #clip_image_embeds = torch.cat([torch.zeros_like(torch.stack(image_prompt_embeds_list)), torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
1205
  # FAILS clip_image_embeds = torch.cat(torch.stack(image_prompt_embeds_list), dim=0).mean(dim=0)
1206
  # FAILS TIMESTEPS clip_image_embeds = torch.cat(image_prompt_embeds_list, dim=0).mean(dim=0)
 
 
 
 
 
 
 
 
1207
 
 
1208
  # 1. Stack the image embeddings
1209
  stacked_image_embeds = torch.cat(image_prompt_embeds_list, dim=1)
1210
  print('shape 1: ', stacked_image_embeds.shape)
@@ -1214,18 +1223,20 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1214
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1215
  zeros_tensor = torch.zeros_like(average_image_embed)
1216
  #print('shape 3: ', zeros_tensor.shape)
1217
- zeros_tensora = zeros_tensor.repeat(1, 1, 1)
1218
- print('shape 3.1: ', zeros_tensora.shape)
1219
- clip_image_embedsa = average_image_embed.repeat(1, 1, 1)
1220
- print('shape 3.5: ', clip_image_embedsa.shape)
1221
  # 4. Concatenate the zeros and the average embedding
1222
- clip_image_embeds2 = torch.cat([zeros_tensor, average_image_embed], dim=0)
1223
- clip_image_embeds3 = clip_image_embeds2.repeat(1, 1, 1)
1224
- print('shape 4: ', clip_image_embeds2.shape)
1225
- clip_image_embeds = torch.cat([zeros_tensora, clip_image_embedsa], dim=0)
1226
- print('shape 4a: ', clip_image_embeds.shape)
1227
- clip_image_embeds = torch.cat([zeros_tensora, clip_image_embedsa], dim=0)
1228
- print('shape 4b: ', clip_image_embeds3.shape)
 
 
1229
  '''
1230
  #clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1231
  print('shape 5: ', clip_image_embeds.shape)
 
1204
  #clip_image_embeds = torch.cat([torch.zeros_like(torch.stack(image_prompt_embeds_list)), torch.stack(image_prompt_embeds_list)], dim=0).mean(dim=0)
1205
  # FAILS clip_image_embeds = torch.cat(torch.stack(image_prompt_embeds_list), dim=0).mean(dim=0)
1206
  # FAILS TIMESTEPS clip_image_embeds = torch.cat(image_prompt_embeds_list, dim=0).mean(dim=0)
1207
+ clip_image_embeds_cat_list = torch.cat(image_prompt_embeds_list).mean(dim=0)
1208
+ print('catted with mean shape 1: ', stacked_image_embeds.shape)
1209
+ clip_image_embeds_stack_list = torch.stack(image_prompt_embeds_list).mean(dim=0)
1210
+ print('stacked with mean shape 1: ', stacked_image_embeds.shape)
1211
+ clip_image_embeds_cat_list = torch.cat(image_prompt_embeds_list)
1212
+ print('catted without mean shape 1: ', stacked_image_embeds.shape)
1213
+ clip_image_embeds_stack_list = torch.stack(image_prompt_embeds_list)
1214
+ print('stacked without mean shape 1: ', stacked_image_embeds.shape)
1215
 
1216
+
1217
  # 1. Stack the image embeddings
1218
  stacked_image_embeds = torch.cat(image_prompt_embeds_list, dim=1)
1219
  print('shape 1: ', stacked_image_embeds.shape)
 
1223
  # 3. Create a tensor of zeros with the same shape as the averaged embedding
1224
  zeros_tensor = torch.zeros_like(average_image_embed)
1225
  #print('shape 3: ', zeros_tensor.shape)
1226
+ zeros_tensor_repeat = zeros_tensor.repeat(1, 1, 1)
1227
+ print('shape 3.1: ', zeros_tensor_repeat.shape)
1228
+ clip_image_embeds_repeat = average_image_embed.repeat(1, 1, 1)
1229
+ print('shape 3.5: ', clip_image_embeds_repeat.shape)
1230
  # 4. Concatenate the zeros and the average embedding
1231
+ clip_image_embeds_cat = torch.cat([zeros_tensor, average_image_embed], dim=0)
1232
+ print('shape 4: ', clip_image_embeds_cat.shape)
1233
+ clip_image_embeds_cat_repeat = clip_image_embeds_cat.repeat(1, 1, 1)
1234
+ print('shape 4.1: ', clip_image_embeds_cat_repeat.shape)
1235
+ clip_image_embeds_repeat_cat = torch.cat([zeros_tensor_repeat, clip_image_embeds_repeat], dim=0)
1236
+ print('shape 4a: ', clip_image_embeds_repeat_cat.shape)
1237
+ clip_image_embeds_repeat_cat_1 = torch.cat([zeros_tensor_repeat, clip_image_embeds_repeat], dim=1)
1238
+ print('shape 4b: ', clip_image_embeds_repeat_cat_1.shape)
1239
+ clip_image_embeds = clip_image_embeds_repeat_cat
1240
  '''
1241
  #clip_image_embeds = clip_image_embeds.unsqueeze(0) # Add a dimension at the beginning so now you have [1, 2*seq_len_img, embed_dim_img]
1242
  print('shape 5: ', clip_image_embeds.shape)