Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
```python
|
2 |
from transformers import RobertaTokenizerFast, AutoModelForSequenceClassification
|
3 |
from datasets import load_dataset, Dataset
|
@@ -40,8 +44,17 @@ window_size = 5
|
|
40 |
context_l = create_windowed_context(raw_dataset, window_size)
|
41 |
raw_dataset_window = raw_dataset.map(partial(create_windowed_context_ds, context_l), batched=False, with_indices=True)
|
42 |
tokenized_data = raw_dataset_window.map(tokenize_function, batched=True)
|
|
|
43 |
|
44 |
|
|
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
```
|
|
|
1 |
+
## Hebrew Conclusion Extraction Model (based on sequence classification)
|
2 |
+
|
3 |
+
#### How to use
|
4 |
+
|
5 |
```python
|
6 |
from transformers import RobertaTokenizerFast, AutoModelForSequenceClassification
|
7 |
from datasets import load_dataset, Dataset
|
|
|
44 |
context_l = create_windowed_context(raw_dataset, window_size)
|
45 |
raw_dataset_window = raw_dataset.map(partial(create_windowed_context_ds, context_l), batched=False, with_indices=True)
|
46 |
tokenized_data = raw_dataset_window.map(tokenize_function, batched=True)
|
47 |
+
```
|
48 |
|
49 |
|
50 |
+
### Citing
|
51 |
|
52 |
+
If you use HeConE in your research, please cite [HeRo: RoBERTa and Longformer Hebrew Language Models](http://arxiv.org/abs/2304.11077).
|
53 |
+
```
|
54 |
+
@article{shalumov2023hero,
|
55 |
+
title={HeRo: RoBERTa and Longformer Hebrew Language Models},
|
56 |
+
author={Vitaly Shalumov and Harel Haskey},
|
57 |
+
year={2023},
|
58 |
+
journal={arXiv:2304.11077},
|
59 |
+
}
|
60 |
```
|