Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 26, 2024

Commit

833b955

·

verified ·

1 Parent(s): 23c0feb

Update modeling_quiet.py

Files changed (1) hide show

modeling_quiet.py +13 -1

modeling_quiet.py CHANGED Viewed

@@ -1072,7 +1072,16 @@ class QuietModel(QuietPreTrainedModel):
                 )
         if attention_mask is None:
-            attention_mask = torch.ones((batch_size, seq_len), dtype=torch.bool, device=input_ids.device)
         if attention_mask.dim() == 2:
             attention_mask = attention_mask.view(batch_size, 1, 1, seq_len)
@@ -1880,6 +1889,9 @@ class QuietForCausalLM(QuietPreTrainedModel):
                         attention_mask = attention_mask.expand(batch_size, 1, seq_len, seq_len)
                     elif attention_mask.dim() != 4:
                         raise ValueError(f"Attention mask should be of shape (batch_size, 1, seq_len, seq_len), but got {attention_mask.shape}")
                 past_key_values = outputs.past_key_values
                 position_ids = position_ids + 1

                 )
         if attention_mask is None:
+            if input_ids is not None:
+                attention_mask = torch.ones_like(input_ids, dtype=torch.bool)
+            elif inputs_embeds is not None:
+                attention_mask = torch.ones(
+                    (batch_size, seq_len),
+                    dtype=torch.bool,
+                    device=inputs_embeds.device
+                )
+            else:
+                raise ValueError("Either input_ids or inputs_embeds should be provided.")
         if attention_mask.dim() == 2:
             attention_mask = attention_mask.view(batch_size, 1, 1, seq_len)
                         attention_mask = attention_mask.expand(batch_size, 1, seq_len, seq_len)
                     elif attention_mask.dim() != 4:
                         raise ValueError(f"Attention mask should be of shape (batch_size, 1, seq_len, seq_len), but got {attention_mask.shape}")
+                    attention_mask = attention_mask.to(dtype=torch.bool)
                 past_key_values = outputs.past_key_values
                 position_ids = position_ids + 1