Spaces:

Ryukijano
/

Fastest-image-generation

Runtime error

App Files Files Community

Ryukijano commited on Dec 9, 2024

Commit

105e0dd

verified ·

1 Parent(s): 16c45c8

Update custom_pipeline.py

Browse files

Files changed (1) hide show

custom_pipeline.py +18 -19

custom_pipeline.py CHANGED Viewed

@@ -64,8 +64,8 @@ class FluxWithCFGPipeline(FluxPipeline):
         pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
         max_sequence_length: int = 300,
-        generate_with_graph = None
     ):
         """Generates images and yields intermediate results during the denoising process."""
         height = height or self.default_sample_size * self.vae_scale_factor
@@ -83,6 +83,7 @@ class FluxWithCFGPipeline(FluxPipeline):
         )
         self._guidance_scale = guidance_scale
         self._interrupt = False
         # 2. Define call parameters
@@ -90,7 +91,7 @@ class FluxWithCFGPipeline(FluxPipeline):
         device = self._execution_device
         # 3. Encode prompt
-        lora_scale = None
         prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
             prompt=prompt,
             prompt_2=prompt_2,
@@ -137,23 +138,21 @@ class FluxWithCFGPipeline(FluxPipeline):
             timestep = t.expand(latents.shape[0]).to(latents.dtype)
-            if generate_with_graph:
-                return generate_with_graph(latents, prompt_embeds, pooled_prompt_embeds, text_ids, latent_image_ids, timestep)
-            else:
-                noise_pred = self.transformer(
-                    hidden_states=latents,
-                    timestep=timestep / 1000,
-                    guidance=guidance,
-                    pooled_projections=pooled_prompt_embeds,
-                    encoder_hidden_states=prompt_embeds,
-                    txt_ids=text_ids,
-                    img_ids=latent_image_ids,
-                    return_dict=False,
-                )[0]
-                 # Yield intermediate result
-                latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
-                torch.cuda.empty_cache()
         # Final image
         return self._decode_latents_to_image(latents, height, width, output_type)

         pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
+        joint_attention_kwargs: Optional[Dict[str, Any]] = None,
         max_sequence_length: int = 300,
     ):
         """Generates images and yields intermediate results during the denoising process."""
         height = height or self.default_sample_size * self.vae_scale_factor
         )
         self._guidance_scale = guidance_scale
+        self._joint_attention_kwargs = joint_attention_kwargs
         self._interrupt = False
         # 2. Define call parameters
         device = self._execution_device
         # 3. Encode prompt
+        lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
         prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
             prompt=prompt,
             prompt_2=prompt_2,
             timestep = t.expand(latents.shape[0]).to(latents.dtype)
+            noise_pred = self.transformer(
+                hidden_states=latents,
+                timestep=timestep / 1000,
+                guidance=guidance,
+                pooled_projections=pooled_prompt_embeds,
+                encoder_hidden_states=prompt_embeds,
+                txt_ids=text_ids,
+                img_ids=latent_image_ids,
+                joint_attention_kwargs=self.joint_attention_kwargs,
+                return_dict=False,
+            )[0]
+             # Yield intermediate result
+            latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+            torch.cuda.empty_cache()
         # Final image
         return self._decode_latents_to_image(latents, height, width, output_type)