1inkusFace commited on
Commit
e718d46
·
verified ·
1 Parent(s): 1023f95

Update pipeline_stable_diffusion_3_ipa.py

Browse files
Files changed (1) hide show
  1. pipeline_stable_diffusion_3_ipa.py +2 -20
pipeline_stable_diffusion_3_ipa.py CHANGED
@@ -1177,30 +1177,12 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1177
  # Concatenate the image embeddings
1178
  concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1) # Concatenate along dimension 1
1179
 
1180
- # Create a linear layer
1181
- embedding_dim = concatenated_embeds.shape[-1] # Get the embedding dimension
1182
- num_images = len(image_prompt_embeds_list)
1183
- input_dim = num_images * embedding_dim
1184
- linear_layer = nn.Linear(input_dim, embedding_dim, dtype=torch.bfloat16).to(device)
1185
- # Move `concatenated_embeds` to the GPU if it's on the CPU
1186
- '''
1187
- if concatenated_embeds.device.type == 'cpu':
1188
- concatenated_embeds = concatenated_embeds.to('cuda')
1189
-
1190
- # Move the `linear_layer` weights and biases to the GPU if they're on the CPU
1191
- if linear_layer.weight.device.type == 'cpu':
1192
- linear_layer.to('cuda')
1193
- '''
1194
- bs_embed, seq_len, _ = concatenated_embeds.shape
1195
- # batch_size = concatenated_embeds.shape[0]
1196
- concatenated_embeds = concatenated_embeds.repeat(1, 1, 1)
1197
- concatenated_embeds = concatenated_embeds.view(bs_embed, seq_len, -1)
1198
- # Pass the concatenated embeddings through the linear layer
1199
  clip_image_embeds = linear_layer(concatenated_embeds)
1200
 
1201
  # Add a ReLU activation for non-linearity (optional)
1202
  #combined_embeds = torch.relu(combined_embeds)
1203
- #clip_image_embeds = combined_embeds #torch.cat(image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
1204
 
1205
  # 4. Prepare timesteps
1206
  timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
 
1177
  # Concatenate the image embeddings
1178
  concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1) # Concatenate along dimension 1
1179
 
1180
+ batch_size, total_embedding_dim = concatenated_embeds.shape
1181
+ linear_layer = nn.Linear(total_embedding_dim, self.embedding_dim, dtype=self.dtype).to(self.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1182
  clip_image_embeds = linear_layer(concatenated_embeds)
1183
 
1184
  # Add a ReLU activation for non-linearity (optional)
1185
  #combined_embeds = torch.relu(combined_embeds)
 
1186
 
1187
  # 4. Prepare timesteps
1188
  timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)