Spaces:
Runtime error
Runtime error
Commit
·
da80bd2
1
Parent(s):
e02be2a
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,10 @@ import torch
|
|
| 7 |
|
| 8 |
def load_tok_and_data(lan):
|
| 9 |
st_time = time()
|
| 10 |
-
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large",
|
|
|
|
|
|
|
|
|
|
| 11 |
dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
|
| 12 |
dataset = [example for example in dataset.take(1001)]
|
| 13 |
return (tokenizer, dataset)
|
|
@@ -59,7 +62,7 @@ def extract_triplets_typed(text):
|
|
| 59 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
| 60 |
return triplets
|
| 61 |
|
| 62 |
-
st.markdown("""This is a demo for the
|
| 63 |
|
| 64 |
model = load_model()
|
| 65 |
|
|
@@ -105,7 +108,11 @@ st.write(text)
|
|
| 105 |
|
| 106 |
if not agree:
|
| 107 |
st.title('Silver output')
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
st.title('Prediction text')
|
| 111 |
decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]
|
|
|
|
| 7 |
|
| 8 |
def load_tok_and_data(lan):
|
| 9 |
st_time = time()
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", tgt_lang="tp_XX")
|
| 11 |
+
tokenizer._src_lang = _Tokens[lan]
|
| 12 |
+
tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
|
| 13 |
+
tokenizer.set_src_lang_special_tokens(_Tokens[lan])
|
| 14 |
dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
|
| 15 |
dataset = [example for example in dataset.take(1001)]
|
| 16 |
return (tokenizer, dataset)
|
|
|
|
| 62 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
| 63 |
return triplets
|
| 64 |
|
| 65 |
+
st.markdown("""This is a demo for the ACL 2023 paper [RED<sup>FM</sup>: a Filtered and Multilingual Relation Extraction Dataset](https://arxiv.org/abs/2306.09802). The pre-trained model is able to extract triplets for up to 400 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/mrebel-large). Read more about it in the [paper](https://arxiv.org/abs/2306.09802) and in the original [repository](https://github.com/Babelscape/rebel#REDFM).""")
|
| 66 |
|
| 67 |
model = load_model()
|
| 68 |
|
|
|
|
| 108 |
|
| 109 |
if not agree:
|
| 110 |
st.title('Silver output')
|
| 111 |
+
entities = dataset[dataset_example]['entities']
|
| 112 |
+
relations =[]
|
| 113 |
+
for trip in dataset[dataset_example]['relations']:
|
| 114 |
+
relations.append({'subject': entities[trip['subject']], 'predicate': trip['predicate'], 'object': entities[trip['object']]})
|
| 115 |
+
st.write(relations)
|
| 116 |
|
| 117 |
st.title('Prediction text')
|
| 118 |
decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]
|