openbmb
/

MiniCPM-Reranker

@@ -89,40 +89,20 @@ flash-attn>2.3.5
 #### Huggingface Transformers
 ```python
-from transformers import AutoModel, LlamaTokenizer, AutoModelForSequenceClassification
 import torch
 import numpy as np
-# from https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py
-class MiniCPMRerankerLLamaTokenizer(LlamaTokenizer):
-    def build_inputs_with_special_tokens(
-            self, token_ids_0, token_ids_1 = None
-        ):
-            """
-            - single sequence: `<s> X </s>`
-            - pair of sequences: `<s> A </s> B`
-            Args:
-                token_ids_0 (`List[int]`):
-                    List of IDs to which the special tokens will be added.
-                token_ids_1 (`List[int]`, *optional*):
-                    Optional second list of IDs for sequence pairs.
-            Returns:
-                `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
-            """
-            if token_ids_1 is None:
-                return super().build_inputs_with_special_tokens(token_ids_0)
-            bos = [self.bos_token_id]
-            sep = [self.eos_token_id]
-            return bos + token_ids_0 + sep + token_ids_1
 model_name = "openbmb/MiniCPM-Reranker"
-tokenizer = MiniCPMRerankerLLamaTokenizer.from_pretrained(model_name, trust_remote_code=True)
 tokenizer.padding_side = "right"
-model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True,attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
 model.eval()
 @torch.no_grad()
@@ -154,37 +134,14 @@ print(np.array(scores))  # [[[-4.7460938][-8.8515625]]]
 ```python
 from sentence_transformers import CrossEncoder
-from transformers import LlamaTokenizer
 import torch
-# from https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py
-class MiniCPMRerankerLLamaTokenizer(LlamaTokenizer):
-    def build_inputs_with_special_tokens(
-            self, token_ids_0, token_ids_1 = None
-        ):
-            """
-            - single sequence: `<s> X </s>`
-            - pair of sequences: `<s> A </s> B`
-            Args:
-                token_ids_0 (`List[int]`):
-                    List of IDs to which the special tokens will be added.
-                token_ids_1 (`List[int]`, *optional*):
-                    Optional second list of IDs for sequence pairs.
-            Returns:
-                `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
-            """
-            if token_ids_1 is None:
-                return super().build_inputs_with_special_tokens(token_ids_0)
-            bos = [self.bos_token_id]
-            sep = [self.eos_token_id]
-            return bos + token_ids_0 + sep + token_ids_1
 model_name = "openbmb/MiniCPM-Reranker"
-model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"attn_implementation":"flash_attention_2","torch_dtype": torch.float16})
-model.tokenizer = MiniCPMRerankerLLamaTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model.tokenizer.padding_side = "right"
 query = "中国的首都是哪里？"

 #### Huggingface Transformers
 ```python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 import numpy as np
 model_name = "openbmb/MiniCPM-Reranker"
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 tokenizer.padding_side = "right"
+model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda")
+# You can also use the following code to use flash_attention_2
+# model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True,attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
 model.eval()
 @torch.no_grad()
 ```python
 from sentence_transformers import CrossEncoder
 import torch
+#
 model_name = "openbmb/MiniCPM-Reranker"
+model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"torch_dtype": torch.float16})
+# You can also use the following code to use flash_attention_2
+#model = CrossEncoder(model_name,max_length=1024,trust_remote_code=True, automodel_args={"attn_implementation":"flash_attention_2","torch_dtype": torch.float16})
 model.tokenizer.padding_side = "right"
 query = "中国的首都是哪里？"