fix example code
Browse filesthe example code provided has some small errors - this is a revision that I tested in colab, with the output commented at the bottom for users to verify
README.md
CHANGED
|
@@ -19,18 +19,19 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
|
| 20 |
model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
|
| 21 |
|
| 22 |
-
|
| 23 |
-
inputs = tokenizer(
|
| 24 |
outputs = model(**inputs)
|
| 25 |
-
logits = outputs.logits.squeeze(-1).float().numpy()
|
| 26 |
score = logits.item()
|
| 27 |
result = {
|
| 28 |
"text": text,
|
| 29 |
"score": score,
|
| 30 |
-
"int_score": int(round(max(0, min(score, 5))))
|
| 31 |
}
|
| 32 |
|
| 33 |
print(result)
|
|
|
|
| 34 |
```
|
| 35 |
|
| 36 |
## Training
|
|
|
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
|
| 20 |
model = AutoModelForSequenceClassification.from_pretrained("HuggingFaceTB/fineweb-edu-classifier")
|
| 21 |
|
| 22 |
+
text = "This is a test sentence."
|
| 23 |
+
inputs = tokenizer(text, return_tensors="pt", padding="longest", truncation=True)
|
| 24 |
outputs = model(**inputs)
|
| 25 |
+
logits = outputs.logits.squeeze(-1).float().detach().numpy()
|
| 26 |
score = logits.item()
|
| 27 |
result = {
|
| 28 |
"text": text,
|
| 29 |
"score": score,
|
| 30 |
+
"int_score": int(round(max(0, min(score, 5)))),
|
| 31 |
}
|
| 32 |
|
| 33 |
print(result)
|
| 34 |
+
# {'text': 'This is a test sentence.', 'score': 0.07964489609003067, 'int_score': 0}
|
| 35 |
```
|
| 36 |
|
| 37 |
## Training
|