Spaces:

1inkusFace
/

StableDiffusion-3.5-Large-IP

Paused

App Files Files Community

1inkusFace commited on Jan 18

Commit

e718d46

verified ·

1 Parent(s): 1023f95

Update pipeline_stable_diffusion_3_ipa.py

Browse files

Files changed (1) hide show

pipeline_stable_diffusion_3_ipa.py +2 -20

pipeline_stable_diffusion_3_ipa.py CHANGED Viewed

@@ -1177,30 +1177,12 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
         # Concatenate the image embeddings
         concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1)  # Concatenate along dimension 1
-        # Create a linear layer
-        embedding_dim = concatenated_embeds.shape[-1]  # Get the embedding dimension
-        num_images = len(image_prompt_embeds_list)
-        input_dim = num_images * embedding_dim
-        linear_layer = nn.Linear(input_dim, embedding_dim, dtype=torch.bfloat16).to(device)
-        # Move `concatenated_embeds` to the GPU if it's on the CPU
-        '''
-        if concatenated_embeds.device.type == 'cpu':
-            concatenated_embeds = concatenated_embeds.to('cuda')
-        # Move the `linear_layer` weights and biases to the GPU if they're on the CPU
-        if linear_layer.weight.device.type == 'cpu':
-            linear_layer.to('cuda')
-        '''
-        bs_embed, seq_len, _ = concatenated_embeds.shape
-       # batch_size = concatenated_embeds.shape[0]
-        concatenated_embeds = concatenated_embeds.repeat(1, 1, 1)
-        concatenated_embeds = concatenated_embeds.view(bs_embed, seq_len, -1)
-        # Pass the concatenated embeddings through the linear layer
         clip_image_embeds = linear_layer(concatenated_embeds)
         # Add a ReLU activation for non-linearity (optional)
         #combined_embeds = torch.relu(combined_embeds)
-        #clip_image_embeds = combined_embeds #torch.cat(image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
         # 4. Prepare timesteps
         timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)

         # Concatenate the image embeddings
         concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1)  # Concatenate along dimension 1
+        batch_size, total_embedding_dim = concatenated_embeds.shape
+        linear_layer = nn.Linear(total_embedding_dim, self.embedding_dim, dtype=self.dtype).to(self.device)
         clip_image_embeds = linear_layer(concatenated_embeds)
         # Add a ReLU activation for non-linearity (optional)
         #combined_embeds = torch.relu(combined_embeds)
         # 4. Prepare timesteps
         timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)