Commit
·
c1b87e2
1
Parent(s):
4c939c0
Update README.md
Browse files
README.md
CHANGED
@@ -17,15 +17,27 @@ via Contrastive Fine-Tuning of mMiniLMv2 without References
|
|
17 |
## How to use
|
18 |
|
19 |
```python
|
|
|
20 |
from transformers import AutoTokenizer, AutoModel
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained("aixplain/NoRef-ER")
|
23 |
model = AutoModel.from_pretrained("aixplain/NoRef-ER")
|
24 |
|
25 |
-
|
|
|
26 |
"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced.",
|
27 |
"In Italy, pizzas serves in formal settings, such as at an restaurant, is presented unslicing."
|
28 |
-
]
|
|
|
|
|
|
|
|
|
29 |
scores = model.score(**tokens)
|
30 |
```
|
31 |
|
|
|
17 |
## How to use
|
18 |
|
19 |
```python
|
20 |
+
import re
|
21 |
from transformers import AutoTokenizer, AutoModel
|
22 |
|
23 |
+
def preprocess(text: str):
|
24 |
+
text = text.lower()
|
25 |
+
text = re.sub(r'[\(\[].*?[\)\]]', '', text)
|
26 |
+
text = re.sub(r'[^\w\s]', '', text)
|
27 |
+
return text
|
28 |
+
|
29 |
tokenizer = AutoTokenizer.from_pretrained("aixplain/NoRef-ER")
|
30 |
model = AutoModel.from_pretrained("aixplain/NoRef-ER")
|
31 |
|
32 |
+
# preprocess
|
33 |
+
texts = [
|
34 |
"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced.",
|
35 |
"In Italy, pizzas serves in formal settings, such as at an restaurant, is presented unslicing."
|
36 |
+
]
|
37 |
+
preprocessed_texts = [preprocess(text) for text in texts]
|
38 |
+
# tokenize
|
39 |
+
tokens = tokenizer(preprocessed_texts, padding=True, return_tensors="pt")
|
40 |
+
# score
|
41 |
scores = model.score(**tokens)
|
42 |
```
|
43 |
|