jbilcke-hf
/

LTX-Video-0.9.1-HFIE

Inference Endpoints

Model card Files Files and versions Community

jbilcke-hf HF staff commited on 6 days ago

Commit

de8d4f2

·

verified ·

1 Parent(s): 60a27e7

Update enhance.py

Files changed (1) hide show

enhance.py +9 -2

enhance.py CHANGED Viewed

@@ -76,6 +76,9 @@ class LTXEnhanceAttnProcessor2_0:
         attention_mask = None,
         **kwargs
     ) -> torch.Tensor:
         # The shape could be [batch_size, sequence_length, channels] or [batch_size, sequence_length, num_heads, head_dim]
         # We need to handle both cases
         if hidden_states.ndim == 4:
@@ -95,7 +98,11 @@ class LTXEnhanceAttnProcessor2_0:
         query = attn.to_q(hidden_states)
         key = attn.to_k(encoder_hidden_states)
         value = attn.to_v(encoder_hidden_states)
         if attn.upcast_attention:
             query = query.float()
             key = key.float()
@@ -125,7 +132,7 @@ class LTXEnhanceAttnProcessor2_0:
         )
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, inner_dim)
-        hidden_states = hidden_states.to(orig_dtype)  # Ensure we're back to original dtype
         if is_enhance_enabled() and enhance_scores is not None:
             hidden_states = hidden_states * enhance_scores

         attention_mask = None,
         **kwargs
     ) -> torch.Tensor:
+        # Store original dtype first
+        orig_dtype = hidden_states.dtype
         # The shape could be [batch_size, sequence_length, channels] or [batch_size, sequence_length, num_heads, head_dim]
         # We need to handle both cases
         if hidden_states.ndim == 4:
         query = attn.to_q(hidden_states)
         key = attn.to_k(encoder_hidden_states)
         value = attn.to_v(encoder_hidden_states)
+        query = query.view(batch_size, sequence_length, num_heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, num_heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, num_heads, head_dim).transpose(1, 2)
         if attn.upcast_attention:
             query = query.float()
             key = key.float()
         )
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, inner_dim)
+        hidden_states = hidden_states.to(orig_dtype)  # Now orig_dtype is defined
         if is_enhance_enabled() and enhance_scores is not None:
             hidden_states = hidden_states * enhance_scores