Spaces:
Build error
Build error
maybe now it works?
Browse files
app.py
CHANGED
@@ -1,12 +1,5 @@
|
|
1 |
-
from logging import disable
|
2 |
-
from pkg_resources import EggMetadata
|
3 |
import streamlit as st
|
4 |
import streamlit.components.v1 as components
|
5 |
-
import networkx as nx
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
-
from pyvis.network import Network
|
8 |
-
from streamlit.state.session_state import SessionState
|
9 |
-
from streamlit.type_util import Key
|
10 |
import rebel
|
11 |
import wikipedia
|
12 |
from utils import clip_text
|
@@ -16,22 +9,23 @@ import os
|
|
16 |
MAX_TOPICS = 3
|
17 |
|
18 |
wiki_state_variables = {
|
19 |
-
'has_run_wiki':False,
|
20 |
'wiki_suggestions': [],
|
21 |
-
'wiki_text'
|
22 |
-
'nodes':[],
|
23 |
-
"topics":[],
|
24 |
-
"html_wiki":""
|
25 |
}
|
26 |
|
27 |
free_text_state_variables = {
|
28 |
-
'has_run_free':False,
|
29 |
-
"html_free":""
|
30 |
|
31 |
}
|
32 |
|
33 |
BUTTON_COLUMS = 4
|
34 |
|
|
|
35 |
def wiki_init_state_variables():
|
36 |
for k in free_text_state_variables.keys():
|
37 |
if k in st.session_state:
|
@@ -41,8 +35,10 @@ def wiki_init_state_variables():
|
|
41 |
if k not in st.session_state:
|
42 |
st.session_state[k] = v
|
43 |
|
|
|
44 |
def wiki_generate_graph():
|
45 |
-
st.session_state["GRAPH_FILENAME"] = str(
|
|
|
46 |
|
47 |
if 'wiki_text' not in st.session_state:
|
48 |
return
|
@@ -52,20 +48,23 @@ def wiki_generate_graph():
|
|
52 |
with st.spinner(text="Generating graph..."):
|
53 |
texts = st.session_state['wiki_text']
|
54 |
st.session_state['nodes'] = []
|
55 |
-
nodes = rebel.generate_knowledge_graph(
|
56 |
-
|
|
|
|
|
57 |
source_code = HtmlFile.read()
|
58 |
st.session_state["html_wiki"] = source_code
|
59 |
os.remove(st.session_state["GRAPH_FILENAME"])
|
60 |
for n in nodes:
|
61 |
n = n.lower()
|
62 |
if n not in st.session_state['topics']:
|
63 |
-
possible_topics = wikipedia.search(n, results
|
64 |
st.session_state['nodes'].extend(possible_topics)
|
65 |
st.session_state['nodes'] = list(set(st.session_state['nodes']))
|
66 |
st.session_state['has_run_wiki'] = True
|
67 |
st.success('Done!')
|
68 |
|
|
|
69 |
def wiki_show_suggestion():
|
70 |
st.session_state['wiki_suggestions'] = []
|
71 |
with st.spinner(text="fetching wiki topics..."):
|
@@ -74,7 +73,9 @@ def wiki_show_suggestion():
|
|
74 |
if (text is not None) and (text != ""):
|
75 |
subjects = text.split(",")[:MAX_TOPICS]
|
76 |
for subj in subjects:
|
77 |
-
st.session_state['wiki_suggestions'] += wikipedia.search(
|
|
|
|
|
78 |
|
79 |
def wiki_show_text(page_title):
|
80 |
with st.spinner(text="fetching wiki page..."):
|
@@ -92,6 +93,7 @@ def wiki_show_text(page_title):
|
|
92 |
except wikipedia.WikipediaException:
|
93 |
st.session_state['wiki_suggestions'].remove(page_title)
|
94 |
|
|
|
95 |
def wiki_add_text(term):
|
96 |
if len(st.session_state['wiki_text']) > MAX_TOPICS:
|
97 |
return
|
@@ -113,16 +115,20 @@ def wiki_add_text(term):
|
|
113 |
print(e)
|
114 |
st.session_state['nodes'].remove(term)
|
115 |
|
|
|
116 |
def wiki_reset_session():
|
117 |
for k in wiki_state_variables:
|
118 |
del st.session_state[k]
|
119 |
|
|
|
120 |
def free_reset_session():
|
121 |
for k in free_text_state_variables:
|
122 |
del st.session_state[k]
|
123 |
|
|
|
124 |
def free_text_generate():
|
125 |
-
st.session_state["GRAPH_FILENAME"] = str(
|
|
|
126 |
text = st.session_state['free_text'][0:100]
|
127 |
rebel.generate_knowledge_graph([text], st.session_state["GRAPH_FILENAME"])
|
128 |
HtmlFile = open(st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
|
@@ -131,9 +137,13 @@ def free_text_generate():
|
|
131 |
os.remove(st.session_state["GRAPH_FILENAME"])
|
132 |
st.session_state['has_run_free'] = True
|
133 |
|
|
|
134 |
def free_text_layout():
|
135 |
-
st.text_area("Free text", key="free_text", height=5,
|
136 |
-
|
|
|
|
|
|
|
137 |
|
138 |
def free_test_init_state_variables():
|
139 |
for k in wiki_state_variables.keys():
|
@@ -144,47 +154,51 @@ def free_test_init_state_variables():
|
|
144 |
if k not in st.session_state:
|
145 |
st.session_state[k] = v
|
146 |
|
|
|
147 |
st.title('RE:Belle')
|
148 |
st.markdown(
|
149 |
-
"""
|
150 |
### Building Beautiful Knowledge Graphs With REBEL
|
151 |
""")
|
152 |
st.selectbox(
|
153 |
-
|
154 |
-
|
155 |
|
156 |
|
157 |
def show_wiki_hub_page():
|
158 |
st.sidebar.button("Reset", on_click=wiki_reset_session, key="reset_key")
|
159 |
|
160 |
st.sidebar.markdown(
|
161 |
-
"""
|
162 |
## How To Create a Graph:
|
163 |
- Enter wikipedia search terms, separated by comma's
|
164 |
- Choose one or more of the suggested topics (max 3)
|
165 |
- Click generate!
|
166 |
"""
|
167 |
-
)
|
168 |
cols = st.columns([8, 1])
|
169 |
with cols[0]:
|
170 |
-
st.text_input("wikipedia search term", on_change=wiki_show_suggestion,
|
|
|
171 |
with cols[1]:
|
172 |
st.text('')
|
173 |
st.text('')
|
174 |
-
st.button("Search", on_click=wiki_show_suggestion,
|
|
|
175 |
|
176 |
if len(st.session_state['wiki_suggestions']) != 0:
|
177 |
num_buttons = len(st.session_state['wiki_suggestions'])
|
178 |
num_cols = num_buttons if 0 < num_buttons < BUTTON_COLUMS else BUTTON_COLUMS
|
179 |
-
columns = st.columns([1] * num_cols
|
180 |
for q in range(1 + num_buttons//num_cols):
|
181 |
for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
|
182 |
with c:
|
183 |
-
st.button(s, on_click=wiki_show_text, args=(
|
|
|
184 |
|
185 |
if len(st.session_state['wiki_text']) != 0:
|
186 |
for i, t in enumerate(st.session_state['wiki_text']):
|
187 |
-
new_expander = st.expander(label=t[:30] + "...", expanded=(i==0))
|
188 |
with new_expander:
|
189 |
st.markdown(t)
|
190 |
|
@@ -209,23 +223,27 @@ def show_wiki_hub_page():
|
|
209 |
for q in range(1 + num_buttons//num_cols):
|
210 |
for i, (c, s) in enumerate(zip(columns, st.session_state["nodes"][q*num_cols: (q+1)*num_cols])):
|
211 |
with c:
|
212 |
-
st.button(s, on_click=wiki_add_text,
|
|
|
|
|
213 |
|
214 |
def show_free_text_hub_page():
|
215 |
-
st.sidebar.button("Reset", on_click=free_reset_session,
|
|
|
216 |
st.sidebar.markdown(
|
217 |
-
"""
|
218 |
## How To Create a Graph:
|
219 |
- Enter a text you'd like to see as a graph.
|
220 |
- Click generate!
|
221 |
"""
|
222 |
-
)
|
223 |
|
224 |
free_text_layout()
|
225 |
|
226 |
if st.session_state['has_run_free']:
|
227 |
components.html(st.session_state["html_free"], width=720, height=600)
|
228 |
|
|
|
229 |
if st.session_state['input_method'] == "wikipedia":
|
230 |
wiki_init_state_variables()
|
231 |
show_wiki_hub_page()
|
@@ -234,9 +252,8 @@ else:
|
|
234 |
show_free_text_hub_page()
|
235 |
|
236 |
|
237 |
-
|
238 |
st.sidebar.markdown(
|
239 |
-
"""
|
240 |
## What This Is And Why We Built it
|
241 |
|
242 |
This space shows how a transformer network can be used to convert *human* text into a computer-queryable format: a **knowledge graph**. Knowledge graphs are graphs where each node (or *vertex* if you're fancy) represent a concept/person/thing and each edge the link between those concepts. If you'd like to know more, you can read [this blogpost](https://www.ml6.eu/knowhow/knowledge-graphs-an-introduction-and-business-applications).
|
@@ -248,9 +265,9 @@ There is one problem though: building knowledge graphs from scratch is a time-co
|
|
248 |
)
|
249 |
|
250 |
st.sidebar.markdown(
|
251 |
-
"""
|
252 |
*Credits for the REBEL model go out to Pere-Lluís Huguet Cabot and Roberto Navigli.
|
253 |
The code can be found [here](https://github.com/Babelscape/rebel),
|
254 |
and the original paper [here](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf)*
|
255 |
"""
|
256 |
-
)
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import streamlit.components.v1 as components
|
|
|
|
|
|
|
|
|
|
|
3 |
import rebel
|
4 |
import wikipedia
|
5 |
from utils import clip_text
|
|
|
9 |
MAX_TOPICS = 3
|
10 |
|
11 |
wiki_state_variables = {
|
12 |
+
'has_run_wiki': False,
|
13 |
'wiki_suggestions': [],
|
14 |
+
'wiki_text': [],
|
15 |
+
'nodes': [],
|
16 |
+
"topics": [],
|
17 |
+
"html_wiki": ""
|
18 |
}
|
19 |
|
20 |
free_text_state_variables = {
|
21 |
+
'has_run_free': False,
|
22 |
+
"html_free": ""
|
23 |
|
24 |
}
|
25 |
|
26 |
BUTTON_COLUMS = 4
|
27 |
|
28 |
+
|
29 |
def wiki_init_state_variables():
|
30 |
for k in free_text_state_variables.keys():
|
31 |
if k in st.session_state:
|
|
|
35 |
if k not in st.session_state:
|
36 |
st.session_state[k] = v
|
37 |
|
38 |
+
|
39 |
def wiki_generate_graph():
|
40 |
+
st.session_state["GRAPH_FILENAME"] = str(
|
41 |
+
dt.now().timestamp()*1000) + ".html"
|
42 |
|
43 |
if 'wiki_text' not in st.session_state:
|
44 |
return
|
|
|
48 |
with st.spinner(text="Generating graph..."):
|
49 |
texts = st.session_state['wiki_text']
|
50 |
st.session_state['nodes'] = []
|
51 |
+
nodes = rebel.generate_knowledge_graph(
|
52 |
+
texts, st.session_state["GRAPH_FILENAME"])
|
53 |
+
HtmlFile = open(
|
54 |
+
st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
|
55 |
source_code = HtmlFile.read()
|
56 |
st.session_state["html_wiki"] = source_code
|
57 |
os.remove(st.session_state["GRAPH_FILENAME"])
|
58 |
for n in nodes:
|
59 |
n = n.lower()
|
60 |
if n not in st.session_state['topics']:
|
61 |
+
possible_topics = wikipedia.search(n, results=2)
|
62 |
st.session_state['nodes'].extend(possible_topics)
|
63 |
st.session_state['nodes'] = list(set(st.session_state['nodes']))
|
64 |
st.session_state['has_run_wiki'] = True
|
65 |
st.success('Done!')
|
66 |
|
67 |
+
|
68 |
def wiki_show_suggestion():
|
69 |
st.session_state['wiki_suggestions'] = []
|
70 |
with st.spinner(text="fetching wiki topics..."):
|
|
|
73 |
if (text is not None) and (text != ""):
|
74 |
subjects = text.split(",")[:MAX_TOPICS]
|
75 |
for subj in subjects:
|
76 |
+
st.session_state['wiki_suggestions'] += wikipedia.search(
|
77 |
+
subj, results=3)
|
78 |
+
|
79 |
|
80 |
def wiki_show_text(page_title):
|
81 |
with st.spinner(text="fetching wiki page..."):
|
|
|
93 |
except wikipedia.WikipediaException:
|
94 |
st.session_state['wiki_suggestions'].remove(page_title)
|
95 |
|
96 |
+
|
97 |
def wiki_add_text(term):
|
98 |
if len(st.session_state['wiki_text']) > MAX_TOPICS:
|
99 |
return
|
|
|
115 |
print(e)
|
116 |
st.session_state['nodes'].remove(term)
|
117 |
|
118 |
+
|
119 |
def wiki_reset_session():
|
120 |
for k in wiki_state_variables:
|
121 |
del st.session_state[k]
|
122 |
|
123 |
+
|
124 |
def free_reset_session():
|
125 |
for k in free_text_state_variables:
|
126 |
del st.session_state[k]
|
127 |
|
128 |
+
|
129 |
def free_text_generate():
|
130 |
+
st.session_state["GRAPH_FILENAME"] = str(
|
131 |
+
dt.now().timestamp()*1000) + ".html"
|
132 |
text = st.session_state['free_text'][0:100]
|
133 |
rebel.generate_knowledge_graph([text], st.session_state["GRAPH_FILENAME"])
|
134 |
HtmlFile = open(st.session_state["GRAPH_FILENAME"], 'r', encoding='utf-8')
|
|
|
137 |
os.remove(st.session_state["GRAPH_FILENAME"])
|
138 |
st.session_state['has_run_free'] = True
|
139 |
|
140 |
+
|
141 |
def free_text_layout():
|
142 |
+
st.text_area("Free text", key="free_text", height=5,
|
143 |
+
value="Tardigrades, known colloquially as water bears or moss piglets, are a phylum of eight-legged segmented micro-animals.")
|
144 |
+
st.button("Generate", on_click=free_text_generate,
|
145 |
+
key="free_text_generate")
|
146 |
+
|
147 |
|
148 |
def free_test_init_state_variables():
|
149 |
for k in wiki_state_variables.keys():
|
|
|
154 |
if k not in st.session_state:
|
155 |
st.session_state[k] = v
|
156 |
|
157 |
+
|
158 |
st.title('RE:Belle')
|
159 |
st.markdown(
|
160 |
+
"""
|
161 |
### Building Beautiful Knowledge Graphs With REBEL
|
162 |
""")
|
163 |
st.selectbox(
|
164 |
+
'input method',
|
165 |
+
('wikipedia', 'free text'), key="input_method")
|
166 |
|
167 |
|
168 |
def show_wiki_hub_page():
|
169 |
st.sidebar.button("Reset", on_click=wiki_reset_session, key="reset_key")
|
170 |
|
171 |
st.sidebar.markdown(
|
172 |
+
"""
|
173 |
## How To Create a Graph:
|
174 |
- Enter wikipedia search terms, separated by comma's
|
175 |
- Choose one or more of the suggested topics (max 3)
|
176 |
- Click generate!
|
177 |
"""
|
178 |
+
)
|
179 |
cols = st.columns([8, 1])
|
180 |
with cols[0]:
|
181 |
+
st.text_input("wikipedia search term", on_change=wiki_show_suggestion,
|
182 |
+
key="text", value="graphs, are, awesome")
|
183 |
with cols[1]:
|
184 |
st.text('')
|
185 |
st.text('')
|
186 |
+
st.button("Search", on_click=wiki_show_suggestion,
|
187 |
+
key="show_suggestion_key")
|
188 |
|
189 |
if len(st.session_state['wiki_suggestions']) != 0:
|
190 |
num_buttons = len(st.session_state['wiki_suggestions'])
|
191 |
num_cols = num_buttons if 0 < num_buttons < BUTTON_COLUMS else BUTTON_COLUMS
|
192 |
+
columns = st.columns([1] * num_cols)
|
193 |
for q in range(1 + num_buttons//num_cols):
|
194 |
for i, (c, s) in enumerate(zip(columns, st.session_state['wiki_suggestions'][q*num_cols: (q+1)*num_cols])):
|
195 |
with c:
|
196 |
+
st.button(s, on_click=wiki_show_text, args=(
|
197 |
+
s,), key=str(i)+s+"wiki_suggestion")
|
198 |
|
199 |
if len(st.session_state['wiki_text']) != 0:
|
200 |
for i, t in enumerate(st.session_state['wiki_text']):
|
201 |
+
new_expander = st.expander(label=t[:30] + "...", expanded=(i == 0))
|
202 |
with new_expander:
|
203 |
st.markdown(t)
|
204 |
|
|
|
223 |
for q in range(1 + num_buttons//num_cols):
|
224 |
for i, (c, s) in enumerate(zip(columns, st.session_state["nodes"][q*num_cols: (q+1)*num_cols])):
|
225 |
with c:
|
226 |
+
st.button(s, on_click=wiki_add_text,
|
227 |
+
args=(s,), key=str(i)+s)
|
228 |
+
|
229 |
|
230 |
def show_free_text_hub_page():
|
231 |
+
st.sidebar.button("Reset", on_click=free_reset_session,
|
232 |
+
key="free_reset_key")
|
233 |
st.sidebar.markdown(
|
234 |
+
"""
|
235 |
## How To Create a Graph:
|
236 |
- Enter a text you'd like to see as a graph.
|
237 |
- Click generate!
|
238 |
"""
|
239 |
+
)
|
240 |
|
241 |
free_text_layout()
|
242 |
|
243 |
if st.session_state['has_run_free']:
|
244 |
components.html(st.session_state["html_free"], width=720, height=600)
|
245 |
|
246 |
+
|
247 |
if st.session_state['input_method'] == "wikipedia":
|
248 |
wiki_init_state_variables()
|
249 |
show_wiki_hub_page()
|
|
|
252 |
show_free_text_hub_page()
|
253 |
|
254 |
|
|
|
255 |
st.sidebar.markdown(
|
256 |
+
"""
|
257 |
## What This Is And Why We Built it
|
258 |
|
259 |
This space shows how a transformer network can be used to convert *human* text into a computer-queryable format: a **knowledge graph**. Knowledge graphs are graphs where each node (or *vertex* if you're fancy) represent a concept/person/thing and each edge the link between those concepts. If you'd like to know more, you can read [this blogpost](https://www.ml6.eu/knowhow/knowledge-graphs-an-introduction-and-business-applications).
|
|
|
265 |
)
|
266 |
|
267 |
st.sidebar.markdown(
|
268 |
+
"""
|
269 |
*Credits for the REBEL model go out to Pere-Lluís Huguet Cabot and Roberto Navigli.
|
270 |
The code can be found [here](https://github.com/Babelscape/rebel),
|
271 |
and the original paper [here](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf)*
|
272 |
"""
|
273 |
+
)
|
rebel.py
CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
|
|
3 |
from pyvis.network import Network
|
4 |
from functools import lru_cache
|
5 |
import spacy
|
6 |
-
from spacy import displacy
|
7 |
|
8 |
import streamlit as st
|
9 |
|
@@ -31,11 +30,11 @@ DEFAULT_LABEL_COLORS = {
|
|
31 |
|
32 |
@st.experimental_singleton(max_entries=1)
|
33 |
def get_pipeline():
|
34 |
-
triplet_extractor = pipeline(
|
|
|
35 |
return triplet_extractor
|
36 |
|
37 |
|
38 |
-
|
39 |
@st.experimental_singleton(max_entries=1)
|
40 |
def load_spacy():
|
41 |
nlp = spacy.load("en_core_web_sm")
|
@@ -47,13 +46,13 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
47 |
|
48 |
doc = nlp("\n".join(texts).lower())
|
49 |
NERs = [ent.text for ent in doc.ents]
|
50 |
-
NER_types =
|
51 |
|
52 |
triplets = []
|
53 |
for triplet in texts:
|
54 |
triplets.extend(generate_partial_graph(triplet))
|
55 |
-
heads = [
|
56 |
-
tails = [
|
57 |
|
58 |
nodes = list(set(heads + tails))
|
59 |
net = Network(directed=True, width="700px", height="700px")
|
@@ -73,7 +72,7 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
73 |
net.add_node(n, shape="circle")
|
74 |
|
75 |
unique_triplets = set()
|
76 |
-
stringify_trip
|
77 |
for triplet in triplets:
|
78 |
if stringify_trip(triplet) not in unique_triplets:
|
79 |
net.add_edge(triplet["head"].lower(), triplet["tail"].lower(),
|
@@ -95,7 +94,8 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
95 |
@lru_cache(maxsize=16)
|
96 |
def generate_partial_graph(text: str):
|
97 |
triplet_extractor = get_pipeline()
|
98 |
-
a = triplet_extractor(text, return_tensors=True, return_text=False)[
|
|
|
99 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
100 |
extracted_triplets = extract_triplets(extracted_text[0])
|
101 |
return extracted_triplets
|
@@ -113,13 +113,15 @@ def extract_triplets(text):
|
|
113 |
if token == "<triplet>":
|
114 |
current = 't'
|
115 |
if relation != '':
|
116 |
-
triplets.append(
|
|
|
117 |
relation = ''
|
118 |
subject = ''
|
119 |
elif token == "<subj>":
|
120 |
current = 's'
|
121 |
if relation != '':
|
122 |
-
triplets.append(
|
|
|
123 |
object_ = ''
|
124 |
elif token == "<obj>":
|
125 |
current = 'o'
|
@@ -132,7 +134,7 @@ def extract_triplets(text):
|
|
132 |
elif current == 'o':
|
133 |
relation += ' ' + token
|
134 |
if subject != '' and relation != '' and object_ != '':
|
135 |
-
triplets.append(
|
|
|
136 |
|
137 |
return triplets
|
138 |
-
|
|
|
3 |
from pyvis.network import Network
|
4 |
from functools import lru_cache
|
5 |
import spacy
|
|
|
6 |
|
7 |
import streamlit as st
|
8 |
|
|
|
30 |
|
31 |
@st.experimental_singleton(max_entries=1)
|
32 |
def get_pipeline():
|
33 |
+
triplet_extractor = pipeline(
|
34 |
+
'text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
|
35 |
return triplet_extractor
|
36 |
|
37 |
|
|
|
38 |
@st.experimental_singleton(max_entries=1)
|
39 |
def load_spacy():
|
40 |
nlp = spacy.load("en_core_web_sm")
|
|
|
46 |
|
47 |
doc = nlp("\n".join(texts).lower())
|
48 |
NERs = [ent.text for ent in doc.ents]
|
49 |
+
NER_types = [ent.label_ for ent in doc.ents]
|
50 |
|
51 |
triplets = []
|
52 |
for triplet in texts:
|
53 |
triplets.extend(generate_partial_graph(triplet))
|
54 |
+
heads = [t["head"].lower() for t in triplets]
|
55 |
+
tails = [t["tail"].lower() for t in triplets]
|
56 |
|
57 |
nodes = list(set(heads + tails))
|
58 |
net = Network(directed=True, width="700px", height="700px")
|
|
|
72 |
net.add_node(n, shape="circle")
|
73 |
|
74 |
unique_triplets = set()
|
75 |
+
def stringify_trip(x): return x["tail"] + x["head"] + x["type"].lower()
|
76 |
for triplet in triplets:
|
77 |
if stringify_trip(triplet) not in unique_triplets:
|
78 |
net.add_edge(triplet["head"].lower(), triplet["tail"].lower(),
|
|
|
94 |
@lru_cache(maxsize=16)
|
95 |
def generate_partial_graph(text: str):
|
96 |
triplet_extractor = get_pipeline()
|
97 |
+
a = triplet_extractor(text, return_tensors=True, return_text=False)[
|
98 |
+
0]["generated_token_ids"]["output_ids"]
|
99 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
100 |
extracted_triplets = extract_triplets(extracted_text[0])
|
101 |
return extracted_triplets
|
|
|
113 |
if token == "<triplet>":
|
114 |
current = 't'
|
115 |
if relation != '':
|
116 |
+
triplets.append(
|
117 |
+
{'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
|
118 |
relation = ''
|
119 |
subject = ''
|
120 |
elif token == "<subj>":
|
121 |
current = 's'
|
122 |
if relation != '':
|
123 |
+
triplets.append(
|
124 |
+
{'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
|
125 |
object_ = ''
|
126 |
elif token == "<obj>":
|
127 |
current = 'o'
|
|
|
134 |
elif current == 'o':
|
135 |
relation += ' ' + token
|
136 |
if subject != '' and relation != '' and object_ != '':
|
137 |
+
triplets.append(
|
138 |
+
{'head': subject.strip(), 'type': relation.strip(), 'tail': object_.strip()})
|
139 |
|
140 |
return triplets
|
|