Spaces:
Running
Running
Commit
·
569bb4f
1
Parent(s):
b6980f9
add streaming
Browse files
app.py
CHANGED
@@ -21,23 +21,25 @@ def load_models():
|
|
21 |
_ = model.to("cuda:0") # comment if no GPU available
|
22 |
_ = model.eval()
|
23 |
print("+++++ loaded model", time() - st_time)
|
24 |
-
dataset = load_dataset('Babelscape/rebel-dataset', split="validation")
|
25 |
-
return (tokenizer, model, dataset)
|
26 |
|
27 |
def extract_triplets(text):
|
28 |
triplets = []
|
29 |
-
relation = ''
|
|
|
|
|
30 |
for token in text.split():
|
31 |
if token == "<triplet>":
|
32 |
current = 't'
|
33 |
if relation != '':
|
34 |
-
triplets.append((
|
35 |
relation = ''
|
36 |
subject = ''
|
37 |
elif token == "<subj>":
|
38 |
current = 's'
|
39 |
if relation != '':
|
40 |
-
triplets.append((
|
41 |
object_ = ''
|
42 |
elif token == "<obj>":
|
43 |
current = 'o'
|
@@ -49,10 +51,10 @@ def extract_triplets(text):
|
|
49 |
object_ += ' ' + token
|
50 |
elif current == 'o':
|
51 |
relation += ' ' + token
|
52 |
-
|
|
|
53 |
return triplets
|
54 |
|
55 |
-
|
56 |
tokenizer, model, dataset = load_models()
|
57 |
|
58 |
agree = st.checkbox('Free input', False)
|
|
|
21 |
_ = model.to("cuda:0") # comment if no GPU available
|
22 |
_ = model.eval()
|
23 |
print("+++++ loaded model", time() - st_time)
|
24 |
+
dataset = load_dataset('Babelscape/rebel-dataset', split="validation", streaming=True)
|
25 |
+
return (tokenizer, model, dataset.take(1000))
|
26 |
|
27 |
def extract_triplets(text):
|
28 |
triplets = []
|
29 |
+
relation, subject, relation, object_ = '', '', '', ''
|
30 |
+
text = text.strip()
|
31 |
+
current = 'x'
|
32 |
for token in text.split():
|
33 |
if token == "<triplet>":
|
34 |
current = 't'
|
35 |
if relation != '':
|
36 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
37 |
relation = ''
|
38 |
subject = ''
|
39 |
elif token == "<subj>":
|
40 |
current = 's'
|
41 |
if relation != '':
|
42 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
43 |
object_ = ''
|
44 |
elif token == "<obj>":
|
45 |
current = 'o'
|
|
|
51 |
object_ += ' ' + token
|
52 |
elif current == 'o':
|
53 |
relation += ' ' + token
|
54 |
+
if subject != '' and relation != '' and object_ != '':
|
55 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
56 |
return triplets
|
57 |
|
|
|
58 |
tokenizer, model, dataset = load_models()
|
59 |
|
60 |
agree = st.checkbox('Free input', False)
|