dongxiaoqun
commited on
Commit
·
ba356d0
1
Parent(s):
52b50f8
Update README.md
Browse files
README.md
CHANGED
@@ -24,8 +24,7 @@ Task: Summarization
|
|
24 |
```python
|
25 |
from transformers import PegasusForConditionalGeneration,BertTokenizer
|
26 |
from typing import List, Optional
|
27 |
-
|
28 |
-
jieba.initialize()
|
29 |
# Need to download tokenizers_pegasus.py and other Python script from Fengshenbang-LM github repo in advance,
|
30 |
# or you can mv download in tokenizers_pegasus.py and data_utils.py in https://huggingface.co/IDEA-CCNL/Randeng_Pegasus_523M_Summary/tree/main
|
31 |
# Strongly recommend you git clone the Fengshenbang-LM repo:
|
@@ -35,7 +34,7 @@ jieba.initialize()
|
|
35 |
# from tokenizers_pegasus import PegasusTokenizer
|
36 |
class PegasusTokenizer(BertTokenizer):
|
37 |
model_input_names = ["input_ids", "attention_mask"]
|
38 |
-
def __init__(self,
|
39 |
self.pre_tokenizer = pre_tokenizer
|
40 |
super().__init__(pre_tokenizer=self.pre_tokenizer, **kwargs)
|
41 |
self.add_special_tokens({'additional_special_tokens':["<mask_1>"]})
|
|
|
24 |
```python
|
25 |
from transformers import PegasusForConditionalGeneration,BertTokenizer
|
26 |
from typing import List, Optional
|
27 |
+
|
|
|
28 |
# Need to download tokenizers_pegasus.py and other Python script from Fengshenbang-LM github repo in advance,
|
29 |
# or you can mv download in tokenizers_pegasus.py and data_utils.py in https://huggingface.co/IDEA-CCNL/Randeng_Pegasus_523M_Summary/tree/main
|
30 |
# Strongly recommend you git clone the Fengshenbang-LM repo:
|
|
|
34 |
# from tokenizers_pegasus import PegasusTokenizer
|
35 |
class PegasusTokenizer(BertTokenizer):
|
36 |
model_input_names = ["input_ids", "attention_mask"]
|
37 |
+
def __init__(self, **kwargs):
|
38 |
self.pre_tokenizer = pre_tokenizer
|
39 |
super().__init__(pre_tokenizer=self.pre_tokenizer, **kwargs)
|
40 |
self.add_special_tokens({'additional_special_tokens':["<mask_1>"]})
|