Spaces:

openfree
/

ginigen-sora

Paused

App Files Files Community

Sapir commited on Oct 6, 2024

Commit

4f52f00

1 Parent(s): 86b1a7e

Added tpu flash attention.

Browse files

Files changed (3) hide show

xora/models/transformers/attention.py +23 -0
xora/models/transformers/transformer3d.py +2 -2
xora/utils/dist_util.py +0 -6

xora/models/transformers/attention.py CHANGED Viewed

@@ -20,6 +20,13 @@ from diffusers.utils.torch_utils import maybe_allow_in_graph
 from einops import rearrange
 from torch import nn
 # code adapted from  https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention.py
 logger = logging.get_logger(__name__)
@@ -162,6 +169,15 @@ class BasicTransformerBlock(nn.Module):
         self._chunk_size = None
         self._chunk_dim = 0
     def set_chunk_feed_forward(self, chunk_size: Optional[int], dim: int = 0):
         # Sets chunk feed-forward
@@ -461,6 +477,13 @@ class Attention(nn.Module):
             processor = AttnProcessor2_0()
         self.set_processor(processor)
     def set_processor(self, processor: "AttnProcessor") -> None:
         r"""
         Set the attention processor to use.

 from einops import rearrange
 from torch import nn
+try:
+    from torch_xla.experimental.custom_kernel import flash_attention
+except ImportError:
+    # workaround for automatic tests. Currently this function is manually patched
+    # to the torch_xla lib on setup of container
+    pass
 # code adapted from  https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention.py
 logger = logging.get_logger(__name__)
         self._chunk_size = None
         self._chunk_dim = 0
+    def set_use_tpu_flash_attention(self, device):
+        r"""
+        Function sets the flag in this object and propagates down the children. The flag will enforce the usage of TPU
+        attention kernel.
+        """
+        if device == "xla":
+            self.use_tpu_flash_attention = True
+            self.attn1.set_use_tpu_flash_attention(device)
+            self.attn2.set_use_tpu_flash_attention(device)
     def set_chunk_feed_forward(self, chunk_size: Optional[int], dim: int = 0):
         # Sets chunk feed-forward
             processor = AttnProcessor2_0()
         self.set_processor(processor)
+    def set_use_tpu_flash_attention(self, device_type):
+        r"""
+        Function sets the flag in this object. The flag will enforce the usage of TPU attention kernel.
+        """
+        if device_type == "xla":
+            self.use_tpu_flash_attention = True
     def set_processor(self, processor: "AttnProcessor") -> None:
         r"""
         Set the attention processor to use.

xora/models/transformers/transformer3d.py CHANGED Viewed

@@ -153,11 +153,11 @@ class Transformer3DModel(ModelMixin, ConfigMixin):
         """
         logger.info(" ENABLE TPU FLASH ATTENTION -> TRUE")
         # if using TPU -> configure components to use TPU flash attention
-        if dist_util.acceleration_type() == dist_util.AccelerationType.TPU:
             self.use_tpu_flash_attention = True
             # push config down to the attention modules
             for block in self.transformer_blocks:
-                block.set_use_tpu_flash_attention()
     def initialize(self, embedding_std: float, mode: Literal["xora", "pixart"]):
         def _basic_init(module):

         """
         logger.info(" ENABLE TPU FLASH ATTENTION -> TRUE")
         # if using TPU -> configure components to use TPU flash attention
+        if self.device.type == "xla":
             self.use_tpu_flash_attention = True
             # push config down to the attention modules
             for block in self.transformer_blocks:
+                block.set_use_tpu_flash_attention(self.device.type)
     def initialize(self, embedding_std: float, mode: Literal["xora", "pixart"]):
         def _basic_init(module):

xora/utils/dist_util.py CHANGED Viewed

@@ -1,11 +1,5 @@
 from enum import Enum
-class AccelerationType(Enum):
-    CPU = "cpu"
-    GPU = "gpu"
-    TPU = "tpu"
-    MPS = "mps"
 def execute_graph() -> None:
     if _acceleration_type == AccelerationType.TPU:
         xm.mark_step()

 from enum import Enum
 def execute_graph() -> None:
     if _acceleration_type == AccelerationType.TPU:
         xm.mark_step()