PereLluis13 commited on
Commit
569bb4f
·
1 Parent(s): b6980f9

add streaming

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -21,23 +21,25 @@ def load_models():
21
  _ = model.to("cuda:0") # comment if no GPU available
22
  _ = model.eval()
23
  print("+++++ loaded model", time() - st_time)
24
- dataset = load_dataset('Babelscape/rebel-dataset', split="validation")
25
- return (tokenizer, model, dataset)
26
 
27
  def extract_triplets(text):
28
  triplets = []
29
- relation = ''
 
 
30
  for token in text.split():
31
  if token == "<triplet>":
32
  current = 't'
33
  if relation != '':
34
- triplets.append((subject, relation, object_))
35
  relation = ''
36
  subject = ''
37
  elif token == "<subj>":
38
  current = 's'
39
  if relation != '':
40
- triplets.append((subject, relation, object_))
41
  object_ = ''
42
  elif token == "<obj>":
43
  current = 'o'
@@ -49,10 +51,10 @@ def extract_triplets(text):
49
  object_ += ' ' + token
50
  elif current == 'o':
51
  relation += ' ' + token
52
- triplets.append((subject, relation, object_))
 
53
  return triplets
54
 
55
-
56
  tokenizer, model, dataset = load_models()
57
 
58
  agree = st.checkbox('Free input', False)
 
21
  _ = model.to("cuda:0") # comment if no GPU available
22
  _ = model.eval()
23
  print("+++++ loaded model", time() - st_time)
24
+ dataset = load_dataset('Babelscape/rebel-dataset', split="validation", streaming=True)
25
+ return (tokenizer, model, dataset.take(1000))
26
 
27
  def extract_triplets(text):
28
  triplets = []
29
+ relation, subject, relation, object_ = '', '', '', ''
30
+ text = text.strip()
31
+ current = 'x'
32
  for token in text.split():
33
  if token == "<triplet>":
34
  current = 't'
35
  if relation != '':
36
+ triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
37
  relation = ''
38
  subject = ''
39
  elif token == "<subj>":
40
  current = 's'
41
  if relation != '':
42
+ triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
43
  object_ = ''
44
  elif token == "<obj>":
45
  current = 'o'
 
51
  object_ += ' ' + token
52
  elif current == 'o':
53
  relation += ' ' + token
54
+ if subject != '' and relation != '' and object_ != '':
55
+ triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
56
  return triplets
57
 
 
58
  tokenizer, model, dataset = load_models()
59
 
60
  agree = st.checkbox('Free input', False)