jbilcke-hf
/

LTX-Video-0.9.1-HFIE

Inference Endpoints

Model card Files Files and versions Community

jbilcke-hf HF staff commited on 7 days ago

Commit

60a27e7

·

verified ·

1 Parent(s): 573d0ba

Update enhance.py

Files changed (1) hide show

enhance.py +8 -7

enhance.py CHANGED Viewed

@@ -76,8 +76,13 @@ class LTXEnhanceAttnProcessor2_0:
         attention_mask = None,
         **kwargs
     ) -> torch.Tensor:
-        orig_dtype = hidden_states.dtype  # Store original dtype
-        batch_size, sequence_length, _ = hidden_states.shape
         text_seq_length = encoder_hidden_states.shape[1] if encoder_hidden_states is not None else 0
         if encoder_hidden_states is None:
@@ -90,11 +95,7 @@ class LTXEnhanceAttnProcessor2_0:
         query = attn.to_q(hidden_states)
         key = attn.to_k(encoder_hidden_states)
         value = attn.to_v(encoder_hidden_states)
-        query = query.view(batch_size, sequence_length, num_heads, head_dim).transpose(1, 2)
-        key = key.view(batch_size, -1, num_heads, head_dim).transpose(1, 2)
-        value = value.view(batch_size, -1, num_heads, head_dim).transpose(1, 2)
         if attn.upcast_attention:
             query = query.float()
             key = key.float()

         attention_mask = None,
         **kwargs
     ) -> torch.Tensor:
+        # The shape could be [batch_size, sequence_length, channels] or [batch_size, sequence_length, num_heads, head_dim]
+        # We need to handle both cases
+        if hidden_states.ndim == 4:
+            batch_size, sequence_length, num_heads, head_dim = hidden_states.shape
+        else:
+            batch_size, sequence_length, inner_dim = hidden_states.shape
         text_seq_length = encoder_hidden_states.shape[1] if encoder_hidden_states is not None else 0
         if encoder_hidden_states is None:
         query = attn.to_q(hidden_states)
         key = attn.to_k(encoder_hidden_states)
         value = attn.to_v(encoder_hidden_states)
         if attn.upcast_attention:
             query = query.float()
             key = key.float()