Alibaba-NLP
/

gte-Qwen2-1.5B-instruct

Sentence Similarity

sentence-transformers

Safetensors

text-generation-inference

text-embeddings-inference

Model card Files Files and versions

xet

Community

thenlper

Bool1020 commited on May 23

Commit

42e43ef

verified ·

1 Parent(s): 389cdda

Fix: Respect `is_causal=False` config in forward to enable bidirectional attention (#37)

Browse files

- Fix: Respect `is_causal=False` config in forward to enable bidirectional attention (98540699bd4a2b84b672e00e7302cbc314082bdc)

Co-authored-by: Yihang Wang <[email protected]>

Files changed (1) hide show

modeling_qwen.py +4 -4

modeling_qwen.py CHANGED Viewed

@@ -350,7 +350,7 @@ class Qwen2FlashAttention2(Qwen2Attention):
         past_key_value: Optional[Cache] = None,
         output_attentions: bool = False,
         use_cache: bool = False,
-        is_causal: bool = True,
         **kwargs,
     ):
         if "padding_mask" in kwargs:
@@ -646,7 +646,7 @@ class Qwen2SdpaAttention(Qwen2Attention):
         past_key_value: Optional[Cache] = None,
         output_attentions: bool = False,
         use_cache: bool = False,
-        is_causal: bool = False,
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         if output_attentions:
             # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
@@ -965,7 +965,7 @@ class Qwen2Model(Qwen2PreTrainedModel):
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         labels: Optional[torch.LongTensor] = None,
-        is_causal: Optional[bool] = True,
     ) -> Union[Tuple, BaseModelOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -1160,7 +1160,7 @@ class Qwen2ForCausalLM(Qwen2PreTrainedModel):
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
-        is_causal: Optional[bool] = True,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         r"""
         Args:

         past_key_value: Optional[Cache] = None,
         output_attentions: bool = False,
         use_cache: bool = False,
+        is_causal: bool = False,
         **kwargs,
     ):
         if "padding_mask" in kwargs:
         past_key_value: Optional[Cache] = None,
         output_attentions: bool = False,
         use_cache: bool = False,
+        is_causal: bool = True,
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         if output_attentions:
             # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         labels: Optional[torch.LongTensor] = None,
+        is_causal: Optional[bool] = False,
     ) -> Union[Tuple, BaseModelOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
+        is_causal: Optional[bool] = False,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         r"""
         Args: