khaerens commited on
Commit
804a21e
·
1 Parent(s): 2443852

maybe now it works?

Browse files
Files changed (2) hide show
  1. app.py +57 -40
  2. rebel.py +14 -12
app.py CHANGED
@@ -1,12 +1,5 @@
1
- from logging import disable
2
- from pkg_resources import EggMetadata
3
  import streamlit as st
4
  import streamlit.components.v1 as components
5
- import networkx as nx
6
- import matplotlib.pyplot as plt
7
- from pyvis.network import Network
8
- from streamlit.state.session_state import SessionState
9
- from streamlit.type_util import Key
10
  import rebel
11
  import wikipedia
12
  from utils import clip_text
@@ -16,22 +9,23 @@ import os
16
  MAX_TOPICS = 3
17
 
18
  wiki_state_variables = {
19
- 'has_run_wiki':False,
20
  'wiki_suggestions': [],
21
- 'wiki_text' : [],
22
- 'nodes':[],
23
- "topics":[],
24
- "html_wiki":""
25
  }
26
 
27
  free_text_state_variables = {
28
- 'has_run_free':False,
29
- "html_free":""
30
 
31
  }
32
 
33
  BUTTON_COLUMS = 4
34
 
 
35
  def wiki_init_state_variables():
36
  for k in free_text_state_variables.keys():
37
  if k in st.session_state:
@@ -41,8 +35,10 @@ def wiki_init_state_variables():
41
  if k not in st.session_state:
42
  st.session_state[k] = v
43
 
 
44
  def wiki_generate_graph():
45
- st.session_state["GRAPH_FILENAME"] = str(dt.now().timestamp()*1000) + ".html"
 
46
 
47
  if 'wiki_text' not in st.session_state:
48
  return
@@ -52,20 +48,23 @@ def wiki_generate_graph():
52
  with st.spinner(text="Generating graph..."):
53
  texts = st.session_state['wiki_text']
54
  st.session_state['nodes'] = []
55
- nodes = rebel.generate_knowledge_graph(texts, st.session_state["GRAPH_FILENAME"])
56
- HtmlFile = open(st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
 
 
57
  source_code = HtmlFile.read()
58
  st.session_state["html_wiki"] = source_code
59
  os.remove(st.session_state["GRAPH_FILENAME"])
60
  for n in nodes:
61
  n = n.lower()
62
  if n not in st.session_state['topics']:
63
- possible_topics = wikipedia.search(n, results = 2)
64
  st.session_state['nodes'].extend(possible_topics)
65
  st.session_state['nodes'] = list(set(st.session_state['nodes']))
66
  st.session_state['has_run_wiki'] = True
67
  st.success('Done!')
68
 
 
69
  def wiki_show_suggestion():
70
  st.session_state['wiki_suggestions'] = []
71
  with st.spinner(text="fetching wiki topics..."):
@@ -74,7 +73,9 @@ def wiki_show_suggestion():
74
  if (text is not None) and (text != ""):
75
  subjects = text.split(",")[:MAX_TOPICS]
76
  for subj in subjects:
77
- st.session_state['wiki_suggestions'] += wikipedia.search(subj, results = 3)
 
 
78
 
79
  def wiki_show_text(page_title):
80
  with st.spinner(text="fetching wiki page..."):
@@ -92,6 +93,7 @@ def wiki_show_text(page_title):
92
  except wikipedia.WikipediaException:
93
  st.session_state['wiki_suggestions'].remove(page_title)
94
 
 
95
  def wiki_add_text(term):
96
  if len(st.session_state['wiki_text']) > MAX_TOPICS:
97
  return
@@ -113,16 +115,20 @@ def wiki_add_text(term):
113
  print(e)
114
  st.session_state['nodes'].remove(term)
115
 
 
116
  def wiki_reset_session():
117
  for k in wiki_state_variables:
118
  del st.session_state[k]
119
 
 
120
  def free_reset_session():
121
  for k in free_text_state_variables:
122
  del st.session_state[k]
123
 
 
124
  def free_text_generate():
125
- st.session_state["GRAPH_FILENAME"] = str(dt.now().timestamp()*1000) + ".html"
 
126
  text = st.session_state['free_text'][0:100]
127
  rebel.generate_knowledge_graph([text], st.session_state["GRAPH_FILENAME"])
128
  HtmlFile = open(st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
@@ -131,9 +137,13 @@ def free_text_generate():
131
  os.remove(st.session_state["GRAPH_FILENAME"])
132
  st.session_state['has_run_free'] = True
133
 
 
134
  def free_text_layout():
135
- st.text_area("Free text", key="free_text", height=5, value="Tardigrades, known colloquially as water bears or moss piglets, are a phylum of eight-legged segmented micro-animals.")
136
- st.button("Generate", on_click=free_text_generate, key="free_text_generate")
 
 
 
137
 
138
  def free_test_init_state_variables():
139
  for k in wiki_state_variables.keys():
@@ -144,47 +154,51 @@ def free_test_init_state_variables():
144
  if k not in st.session_state:
145
  st.session_state[k] = v
146
 
 
147
  st.title('RE:Belle')
148
  st.markdown(
149
- """
150
  ### Building Beautiful Knowledge Graphs With REBEL
151
  """)
152
  st.selectbox(
153
- 'input method',
154
- ('wikipedia', 'free text'), key="input_method")
155
 
156
 
157
  def show_wiki_hub_page():
158
  st.sidebar.button("Reset", on_click=wiki_reset_session, key="reset_key")
159
 
160
  st.sidebar.markdown(
161
- """
162
  ## How To Create a Graph:
163
  - Enter wikipedia search terms, separated by comma's
164
  - Choose one or more of the suggested topics (max 3)
165
  - Click generate!
166
  """
167
- )
168
  cols = st.columns([8, 1])
169
  with cols[0]:
170
- st.text_input("wikipedia search term", on_change=wiki_show_suggestion, key="text", value="graphs, are, awesome")
 
171
  with cols[1]:
172
  st.text('')
173
  st.text('')
174
- st.button("Search", on_click=wiki_show_suggestion, key="show_suggestion_key")
 
175
 
176
  if len(st.session_state['wiki_suggestions']) != 0:
177
  num_buttons = len(st.session_state['wiki_suggestions'])
178
  num_cols = num_buttons if 0 < num_buttons < BUTTON_COLUMS else BUTTON_COLUMS
179
- columns = st.columns([1] * num_cols )
180
  for q in range(1 + num_buttons//num_cols):
181
  for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
182
  with c:
183
- st.button(s, on_click=wiki_show_text, args=(s,), key=str(i)+s+"wiki_suggestion")
 
184
 
185
  if len(st.session_state['wiki_text']) != 0:
186
  for i, t in enumerate(st.session_state['wiki_text']):
187
- new_expander = st.expander(label=t[:30] + "...", expanded=(i==0))
188
  with new_expander:
189
  st.markdown(t)
190
 
@@ -209,23 +223,27 @@ def show_wiki_hub_page():
209
  for q in range(1 + num_buttons//num_cols):
210
  for i, (c, s) in enumerate(zip(columns, st.session_state["nodes"][q*num_cols: (q+1)*num_cols])):
211
  with c:
212
- st.button(s, on_click=wiki_add_text, args=(s,), key=str(i)+s)
 
 
213
 
214
  def show_free_text_hub_page():
215
- st.sidebar.button("Reset", on_click=free_reset_session, key="free_reset_key")
 
216
  st.sidebar.markdown(
217
- """
218
  ## How To Create a Graph:
219
  - Enter a text you'd like to see as a graph.
220
  - Click generate!
221
  """
222
- )
223
 
224
  free_text_layout()
225
 
226
  if st.session_state['has_run_free']:
227
  components.html(st.session_state["html_free"], width=720, height=600)
228
 
 
229
  if st.session_state['input_method'] == "wikipedia":
230
  wiki_init_state_variables()
231
  show_wiki_hub_page()
@@ -234,9 +252,8 @@ else:
234
  show_free_text_hub_page()
235
 
236
 
237
-
238
  st.sidebar.markdown(
239
- """
240
  ## What This Is And Why We Built it
241
 
242
  This space shows how a transformer network can be used to convert *human* text into a computer-queryable format: a **knowledge graph**. Knowledge graphs are graphs where each node (or *vertex* if you're fancy) represent a concept/person/thing and each edge the link between those concepts. If you'd like to know more, you can read [this blogpost](https://www.ml6.eu/knowhow/knowledge-graphs-an-introduction-and-business-applications).
@@ -248,9 +265,9 @@ There is one problem though: building knowledge graphs from scratch is a time-co
248
  )
249
 
250
  st.sidebar.markdown(
251
- """
252
  *Credits for the REBEL model go out to Pere-Lluís Huguet Cabot and Roberto Navigli.
253
  The code can be found [here](https://github.com/Babelscape/rebel),
254
  and the original paper [here](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf)*
255
  """
256
- )
 
 
 
1
  import streamlit as st
2
  import streamlit.components.v1 as components
 
 
 
 
 
3
  import rebel
4
  import wikipedia
5
  from utils import clip_text
 
9
  MAX_TOPICS = 3
10
 
11
  wiki_state_variables = {
12
+ 'has_run_wiki': False,
13
  'wiki_suggestions': [],
14
+ 'wiki_text': [],
15
+ 'nodes': [],
16
+ "topics": [],
17
+ "html_wiki": ""
18
  }
19
 
20
  free_text_state_variables = {
21
+ 'has_run_free': False,
22
+ "html_free": ""
23
 
24
  }
25
 
26
  BUTTON_COLUMS = 4
27
 
28
+
29
  def wiki_init_state_variables():
30
  for k in free_text_state_variables.keys():
31
  if k in st.session_state:
 
35
  if k not in st.session_state:
36
  st.session_state[k] = v
37
 
38
+
39
  def wiki_generate_graph():
40
+ st.session_state["GRAPH_FILENAME"] = str(
41
+ dt.now().timestamp()*1000) + ".html"
42
 
43
  if 'wiki_text' not in st.session_state:
44
  return
 
48
  with st.spinner(text="Generating graph..."):
49
  texts = st.session_state['wiki_text']
50
  st.session_state['nodes'] = []
51
+ nodes = rebel.generate_knowledge_graph(
52
+ texts, st.session_state["GRAPH_FILENAME"])
53
+ HtmlFile = open(
54
+ st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
55
  source_code = HtmlFile.read()
56
  st.session_state["html_wiki"] = source_code
57
  os.remove(st.session_state["GRAPH_FILENAME"])
58
  for n in nodes:
59
  n = n.lower()
60
  if n not in st.session_state['topics']:
61
+ possible_topics = wikipedia.search(n, results=2)
62
  st.session_state['nodes'].extend(possible_topics)
63
  st.session_state['nodes'] = list(set(st.session_state['nodes']))
64
  st.session_state['has_run_wiki'] = True
65
  st.success('Done!')
66
 
67
+
68
  def wiki_show_suggestion():
69
  st.session_state['wiki_suggestions'] = []
70
  with st.spinner(text="fetching wiki topics..."):
 
73
  if (text is not None) and (text != ""):
74
  subjects = text.split(",")[:MAX_TOPICS]
75
  for subj in subjects:
76
+ st.session_state['wiki_suggestions'] += wikipedia.search(
77
+ subj, results=3)
78
+
79
 
80
  def wiki_show_text(page_title):
81
  with st.spinner(text="fetching wiki page..."):
 
93
  except wikipedia.WikipediaException:
94
  st.session_state['wiki_suggestions'].remove(page_title)
95
 
96
+
97
  def wiki_add_text(term):
98
  if len(st.session_state['wiki_text']) > MAX_TOPICS:
99
  return
 
115
  print(e)
116
  st.session_state['nodes'].remove(term)
117
 
118
+
119
  def wiki_reset_session():
120
  for k in wiki_state_variables:
121
  del st.session_state[k]
122
 
123
+
124
  def free_reset_session():
125
  for k in free_text_state_variables:
126
  del st.session_state[k]
127
 
128
+
129
  def free_text_generate():
130
+ st.session_state["GRAPH_FILENAME"] = str(
131
+ dt.now().timestamp()*1000) + ".html"
132
  text = st.session_state['free_text'][0:100]
133
  rebel.generate_knowledge_graph([text], st.session_state["GRAPH_FILENAME"])
134
  HtmlFile = open(st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
 
137
  os.remove(st.session_state["GRAPH_FILENAME"])
138
  st.session_state['has_run_free'] = True
139
 
140
+
141
  def free_text_layout():
142
+ st.text_area("Free text", key="free_text", height=5,
143
+ value="Tardigrades, known colloquially as water bears or moss piglets, are a phylum of eight-legged segmented micro-animals.")
144
+ st.button("Generate", on_click=free_text_generate,
145
+ key="free_text_generate")
146
+
147
 
148
  def free_test_init_state_variables():
149
  for k in wiki_state_variables.keys():
 
154
  if k not in st.session_state:
155
  st.session_state[k] = v
156
 
157
+
158
  st.title('RE:Belle')
159
  st.markdown(
160
+ """
161
  ### Building Beautiful Knowledge Graphs With REBEL
162
  """)
163
  st.selectbox(
164
+ 'input method',
165
+ ('wikipedia', 'free text'), key="input_method")
166
 
167
 
168
  def show_wiki_hub_page():
169
  st.sidebar.button("Reset", on_click=wiki_reset_session, key="reset_key")
170
 
171
  st.sidebar.markdown(
172
+ """
173
  ## How To Create a Graph:
174
  - Enter wikipedia search terms, separated by comma's
175
  - Choose one or more of the suggested topics (max 3)
176
  - Click generate!
177
  """
178
+ )
179
  cols = st.columns([8, 1])
180
  with cols[0]:
181
+ st.text_input("wikipedia search term", on_change=wiki_show_suggestion,
182
+ key="text", value="graphs, are, awesome")
183
  with cols[1]:
184
  st.text('')
185
  st.text('')
186
+ st.button("Search", on_click=wiki_show_suggestion,
187
+ key="show_suggestion_key")
188
 
189
  if len(st.session_state['wiki_suggestions']) != 0:
190
  num_buttons = len(st.session_state['wiki_suggestions'])
191
  num_cols = num_buttons if 0 < num_buttons < BUTTON_COLUMS else BUTTON_COLUMS
192
+ columns = st.columns([1] * num_cols)
193
  for q in range(1 + num_buttons//num_cols):
194
  for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
195
  with c:
196
+ st.button(s, on_click=wiki_show_text, args=(
197
+ s,), key=str(i)+s+"wiki_suggestion")
198
 
199
  if len(st.session_state['wiki_text']) != 0:
200
  for i, t in enumerate(st.session_state['wiki_text']):
201
+ new_expander = st.expander(label=t[:30] + "...", expanded=(i == 0))
202
  with new_expander:
203
  st.markdown(t)
204
 
 
223
  for q in range(1 + num_buttons//num_cols):
224
  for i, (c, s) in enumerate(zip(columns, st.session_state["nodes"][q*num_cols: (q+1)*num_cols])):
225
  with c:
226
+ st.button(s, on_click=wiki_add_text,
227
+ args=(s,), key=str(i)+s)
228
+
229
 
230
  def show_free_text_hub_page():
231
+ st.sidebar.button("Reset", on_click=free_reset_session,
232
+ key="free_reset_key")
233
  st.sidebar.markdown(
234
+ """
235
  ## How To Create a Graph:
236
  - Enter a text you'd like to see as a graph.
237
  - Click generate!
238
  """
239
+ )
240
 
241
  free_text_layout()
242
 
243
  if st.session_state['has_run_free']:
244
  components.html(st.session_state["html_free"], width=720, height=600)
245
 
246
+
247
  if st.session_state['input_method'] == "wikipedia":
248
  wiki_init_state_variables()
249
  show_wiki_hub_page()
 
252
  show_free_text_hub_page()
253
 
254
 
 
255
  st.sidebar.markdown(
256
+ """
257
  ## What This Is And Why We Built it
258
 
259
  This space shows how a transformer network can be used to convert *human* text into a computer-queryable format: a **knowledge graph**. Knowledge graphs are graphs where each node (or *vertex* if you're fancy) represent a concept/person/thing and each edge the link between those concepts. If you'd like to know more, you can read [this blogpost](https://www.ml6.eu/knowhow/knowledge-graphs-an-introduction-and-business-applications).
 
265
  )
266
 
267
  st.sidebar.markdown(
268
+ """
269
  *Credits for the REBEL model go out to Pere-Lluís Huguet Cabot and Roberto Navigli.
270
  The code can be found [here](https://github.com/Babelscape/rebel),
271
  and the original paper [here](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf)*
272
  """
273
+ )
rebel.py CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
3
  from pyvis.network import Network
4
  from functools import lru_cache
5
  import spacy
6
- from spacy import displacy
7
 
8
  import streamlit as st
9
 
@@ -31,11 +30,11 @@ DEFAULT_LABEL_COLORS = {
31
 
32
  @st.experimental_singleton(max_entries=1)
33
  def get_pipeline():
34
- triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
 
35
  return triplet_extractor
36
 
37
 
38
-
39
  @st.experimental_singleton(max_entries=1)
40
  def load_spacy():
41
  nlp = spacy.load("en_core_web_sm")
@@ -47,13 +46,13 @@ def generate_knowledge_graph(texts: List[str], filename: str):
47
 
48
  doc = nlp("\n".join(texts).lower())
49
  NERs = [ent.text for ent in doc.ents]
50
- NER_types = [ent.label_ for ent in doc.ents]
51
 
52
  triplets = []
53
  for triplet in texts:
54
  triplets.extend(generate_partial_graph(triplet))
55
- heads = [ t["head"].lower() for t in triplets]
56
- tails = [ t["tail"].lower() for t in triplets]
57
 
58
  nodes = list(set(heads + tails))
59
  net = Network(directed=True, width="700px", height="700px")
@@ -73,7 +72,7 @@ def generate_knowledge_graph(texts: List[str], filename: str):
73
  net.add_node(n, shape="circle")
74
 
75
  unique_triplets = set()
76
- stringify_trip = lambda x : x["tail"] + x["head"] + x["type"].lower()
77
  for triplet in triplets:
78
  if stringify_trip(triplet) not in unique_triplets:
79
  net.add_edge(triplet["head"].lower(), triplet["tail"].lower(),
@@ -95,7 +94,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
95
  @lru_cache(maxsize=16)
96
  def generate_partial_graph(text: str):
97
  triplet_extractor = get_pipeline()
98
- a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
 
99
  extracted_text = triplet_extractor.tokenizer.batch_decode(a)
100
  extracted_triplets = extract_triplets(extracted_text[0])
101
  return extracted_triplets
@@ -113,13 +113,15 @@ def extract_triplets(text):
113
  if token == "<triplet>":
114
  current = 't'
115
  if relation != '':
116
- triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
 
117
  relation = ''
118
  subject = ''
119
  elif token == "<subj>":
120
  current = 's'
121
  if relation != '':
122
- triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
 
123
  object_ = ''
124
  elif token == "<obj>":
125
  current = 'o'
@@ -132,7 +134,7 @@ def extract_triplets(text):
132
  elif current == 'o':
133
  relation += ' ' + token
134
  if subject != '' and relation != '' and object_ != '':
135
- triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
 
136
 
137
  return triplets
138
-
 
3
  from pyvis.network import Network
4
  from functools import lru_cache
5
  import spacy
 
6
 
7
  import streamlit as st
8
 
 
30
 
31
  @st.experimental_singleton(max_entries=1)
32
  def get_pipeline():
33
+ triplet_extractor = pipeline(
34
+ 'text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
35
  return triplet_extractor
36
 
37
 
 
38
  @st.experimental_singleton(max_entries=1)
39
  def load_spacy():
40
  nlp = spacy.load("en_core_web_sm")
 
46
 
47
  doc = nlp("\n".join(texts).lower())
48
  NERs = [ent.text for ent in doc.ents]
49
+ NER_types = [ent.label_ for ent in doc.ents]
50
 
51
  triplets = []
52
  for triplet in texts:
53
  triplets.extend(generate_partial_graph(triplet))
54
+ heads = [t["head"].lower() for t in triplets]
55
+ tails = [t["tail"].lower() for t in triplets]
56
 
57
  nodes = list(set(heads + tails))
58
  net = Network(directed=True, width="700px", height="700px")
 
72
  net.add_node(n, shape="circle")
73
 
74
  unique_triplets = set()
75
+ def stringify_trip(x): return x["tail"] + x["head"] + x["type"].lower()
76
  for triplet in triplets:
77
  if stringify_trip(triplet) not in unique_triplets:
78
  net.add_edge(triplet["head"].lower(), triplet["tail"].lower(),
 
94
  @lru_cache(maxsize=16)
95
  def generate_partial_graph(text: str):
96
  triplet_extractor = get_pipeline()
97
+ a = triplet_extractor(text, return_tensors=True, return_text=False)[
98
+ 0]["generated_token_ids"]["output_ids"]
99
  extracted_text = triplet_extractor.tokenizer.batch_decode(a)
100
  extracted_triplets = extract_triplets(extracted_text[0])
101
  return extracted_triplets
 
113
  if token == "<triplet>":
114
  current = 't'
115
  if relation != '':
116
+ triplets.append(
117
+ {'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
118
  relation = ''
119
  subject = ''
120
  elif token == "<subj>":
121
  current = 's'
122
  if relation != '':
123
+ triplets.append(
124
+ {'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
125
  object_ = ''
126
  elif token == "<obj>":
127
  current = 'o'
 
134
  elif current == 'o':
135
  relation += ' ' + token
136
  if subject != '' and relation != '' and object_ != '':
137
+ triplets.append(
138
+ {'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
139
 
140
  return triplets