Update pipeline_stable_diffusion_3_ipa.py
Browse files
pipeline_stable_diffusion_3_ipa.py
CHANGED
@@ -1177,30 +1177,12 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
|
|
1177 |
# Concatenate the image embeddings
|
1178 |
concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1) # Concatenate along dimension 1
|
1179 |
|
1180 |
-
|
1181 |
-
|
1182 |
-
num_images = len(image_prompt_embeds_list)
|
1183 |
-
input_dim = num_images * embedding_dim
|
1184 |
-
linear_layer = nn.Linear(input_dim, embedding_dim, dtype=torch.bfloat16).to(device)
|
1185 |
-
# Move `concatenated_embeds` to the GPU if it's on the CPU
|
1186 |
-
'''
|
1187 |
-
if concatenated_embeds.device.type == 'cpu':
|
1188 |
-
concatenated_embeds = concatenated_embeds.to('cuda')
|
1189 |
-
|
1190 |
-
# Move the `linear_layer` weights and biases to the GPU if they're on the CPU
|
1191 |
-
if linear_layer.weight.device.type == 'cpu':
|
1192 |
-
linear_layer.to('cuda')
|
1193 |
-
'''
|
1194 |
-
bs_embed, seq_len, _ = concatenated_embeds.shape
|
1195 |
-
# batch_size = concatenated_embeds.shape[0]
|
1196 |
-
concatenated_embeds = concatenated_embeds.repeat(1, 1, 1)
|
1197 |
-
concatenated_embeds = concatenated_embeds.view(bs_embed, seq_len, -1)
|
1198 |
-
# Pass the concatenated embeddings through the linear layer
|
1199 |
clip_image_embeds = linear_layer(concatenated_embeds)
|
1200 |
|
1201 |
# Add a ReLU activation for non-linearity (optional)
|
1202 |
#combined_embeds = torch.relu(combined_embeds)
|
1203 |
-
#clip_image_embeds = combined_embeds #torch.cat(image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
|
1204 |
|
1205 |
# 4. Prepare timesteps
|
1206 |
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
|
|
|
1177 |
# Concatenate the image embeddings
|
1178 |
concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1) # Concatenate along dimension 1
|
1179 |
|
1180 |
+
batch_size, total_embedding_dim = concatenated_embeds.shape
|
1181 |
+
linear_layer = nn.Linear(total_embedding_dim, self.embedding_dim, dtype=self.dtype).to(self.device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1182 |
clip_image_embeds = linear_layer(concatenated_embeds)
|
1183 |
|
1184 |
# Add a ReLU activation for non-linearity (optional)
|
1185 |
#combined_embeds = torch.relu(combined_embeds)
|
|
|
1186 |
|
1187 |
# 4. Prepare timesteps
|
1188 |
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
|