dongxiaoqun commited on
Commit
ba356d0
·
1 Parent(s): 52b50f8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -3
README.md CHANGED
@@ -24,8 +24,7 @@ Task: Summarization
24
  ```python
25
  from transformers import PegasusForConditionalGeneration,BertTokenizer
26
  from typing import List, Optional
27
- import jieba
28
- jieba.initialize()
29
  # Need to download tokenizers_pegasus.py and other Python script from Fengshenbang-LM github repo in advance,
30
  # or you can mv download in tokenizers_pegasus.py and data_utils.py in https://huggingface.co/IDEA-CCNL/Randeng_Pegasus_523M_Summary/tree/main
31
  # Strongly recommend you git clone the Fengshenbang-LM repo:
@@ -35,7 +34,7 @@ jieba.initialize()
35
  # from tokenizers_pegasus import PegasusTokenizer
36
  class PegasusTokenizer(BertTokenizer):
37
  model_input_names = ["input_ids", "attention_mask"]
38
- def __init__(self, pre_tokenizer=lambda x: jieba.cut(x, HMM=False), **kwargs):
39
  self.pre_tokenizer = pre_tokenizer
40
  super().__init__(pre_tokenizer=self.pre_tokenizer, **kwargs)
41
  self.add_special_tokens({'additional_special_tokens':["<mask_1>"]})
 
24
  ```python
25
  from transformers import PegasusForConditionalGeneration,BertTokenizer
26
  from typing import List, Optional
27
+
 
28
  # Need to download tokenizers_pegasus.py and other Python script from Fengshenbang-LM github repo in advance,
29
  # or you can mv download in tokenizers_pegasus.py and data_utils.py in https://huggingface.co/IDEA-CCNL/Randeng_Pegasus_523M_Summary/tree/main
30
  # Strongly recommend you git clone the Fengshenbang-LM repo:
 
34
  # from tokenizers_pegasus import PegasusTokenizer
35
  class PegasusTokenizer(BertTokenizer):
36
  model_input_names = ["input_ids", "attention_mask"]
37
+ def __init__(self, **kwargs):
38
  self.pre_tokenizer = pre_tokenizer
39
  super().__init__(pre_tokenizer=self.pre_tokenizer, **kwargs)
40
  self.add_special_tokens({'additional_special_tokens':["<mask_1>"]})