Update app.py
Browse files
app.py
CHANGED
@@ -260,6 +260,7 @@ def highlight_entities(article_content,summary_output):
|
|
260 |
for entity in unmatched_entities:
|
261 |
summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
|
262 |
soup = BeautifulSoup(summary_output, features="html.parser")
|
|
|
263 |
return HTML_WRAPPER.format(soup)
|
264 |
|
265 |
|
@@ -274,26 +275,6 @@ def render_svg(svg_file):
|
|
274 |
html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
|
275 |
return html
|
276 |
|
277 |
-
|
278 |
-
def generate_abstractive_summary(text, type, min_len=120, max_len=512, **kwargs):
|
279 |
-
text = text.strip().replace("\n", " ")
|
280 |
-
if type == "top_p":
|
281 |
-
text = summarization_model(text, min_length=min_len,
|
282 |
-
max_length=max_len,
|
283 |
-
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
284 |
-
elif type == "greedy":
|
285 |
-
text = summarization_model(text, min_length=min_len,
|
286 |
-
max_length=max_len, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
287 |
-
elif type == "top_k":
|
288 |
-
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
289 |
-
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
290 |
-
elif type == "beam":
|
291 |
-
text = summarization_model(text, min_length=min_len,
|
292 |
-
max_length=max_len,
|
293 |
-
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
294 |
-
summary = text[0]['summary_text'].replace("<n>", " ")
|
295 |
-
return summary
|
296 |
-
|
297 |
def clean_text(text,doc=False,plain_text=False,url=False):
|
298 |
"""Return clean text from the various input sources"""
|
299 |
|
@@ -319,7 +300,6 @@ def clean_text(text,doc=False,plain_text=False,url=False):
|
|
319 |
return None, clean_text
|
320 |
|
321 |
|
322 |
-
|
323 |
@st.experimental_singleton(suppress_st_warning=True)
|
324 |
def get_spacy():
|
325 |
nlp = en_core_web_lg.load()
|
@@ -486,7 +466,7 @@ if summarize:
|
|
486 |
|
487 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
488 |
|
489 |
-
entity_match_html = highlight_entities(' '.join(
|
490 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the Original Text")
|
491 |
st.markdown("####")
|
492 |
|
|
|
260 |
for entity in unmatched_entities:
|
261 |
summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
|
262 |
soup = BeautifulSoup(summary_output, features="html.parser")
|
263 |
+
st.write(soup,unsafe_allow_html=True)
|
264 |
return HTML_WRAPPER.format(soup)
|
265 |
|
266 |
|
|
|
275 |
html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
|
276 |
return html
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
def clean_text(text,doc=False,plain_text=False,url=False):
|
279 |
"""Return clean text from the various input sources"""
|
280 |
|
|
|
300 |
return None, clean_text
|
301 |
|
302 |
|
|
|
303 |
@st.experimental_singleton(suppress_st_warning=True)
|
304 |
def get_spacy():
|
305 |
nlp = en_core_web_lg.load()
|
|
|
466 |
|
467 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
468 |
|
469 |
+
entity_match_html = highlight_entities(' '.join(text_to_summarize),summarized_text)
|
470 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the Original Text")
|
471 |
st.markdown("####")
|
472 |
|