Spaces:
Build error
Build error
File size: 4,723 Bytes
abcaca9 9c2785c abcaca9 9c2785c abcaca9 8d4dd5e abcaca9 ce42613 cd2a4c0 7c65c8c a29b26b 6b3f61e 7c65c8c 6b3f61e c6ee980 ce42613 8d4dd5e ce42613 9c2785c ce42613 abcaca9 ce42613 9c2785c ce42613 8d4dd5e ce42613 8d4dd5e ce42613 8d4dd5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import nltk
import streamlit as st
import validators
from transformers import pipeline
from validators import ValidationFailure
from Summarizer import Summarizer
def main() -> None:
nltk.download('punkt')
st.markdown('# Terms & Conditions Summarizer :pencil:')
st.markdown('Do you also always take the time out of your day to thoroughly read every word of the Terms & Conditions before signing up to an app like the responsible citizen that you are? :thinking_face:<br>'
'No?<br>'
"Well don't worry, neither do we! That's why we created a <b>Terms & Conditions Summarization</b> algorithm!", unsafe_allow_html=True)
st.markdown('Just copy-paste that pesky Terms & Conditions text or provide a URL to the text and let our fancy NLP algorithm do the rest!<br>'
'You will see both an extractive summary (the most important sentences will be highlighted) and an abstractive summary (an actual summary)<br>'
'Now you can just take a quick glanse at the summary and go about the rest of your day assured that no one is abusing your precious personal data :books:', unsafe_allow_html=True)
st.markdown('<b>Want to find out more?</b> :brain:<br>'
'For details about the extractive part :point_right: https://en.wikipedia.org/wiki/Latent_semantic_analysis<br>'
'For details about the abstractive part :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr', unsafe_allow_html=True)
@st.cache(allow_output_mutation=True,
suppress_st_warning=True,
show_spinner=False)
def create_pipeline():
with st.spinner('Please wait for the model to load...'):
terms_and_conditions_pipeline = pipeline(
task='summarization',
model='ml6team/distilbart-tos-summarizer-tosdr',
tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
)
return terms_and_conditions_pipeline
def display_abstractive_summary(summary) -> None:
st.subheader("Abstractive Summary")
st.markdown('#####')
st.markdown(summary)
def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None:
st.subheader("Extractive Summary")
st.markdown('#####')
terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences)
replaced_text = terms_and_conditions
for sentence in summary_sentences:
replaced_text = replaced_text.replace(sentence,
f"<span style='background-color: #FFFF00'>{sentence}</span>")
st.write(replaced_text, unsafe_allow_html=True)
def is_valid_url(url: str) -> bool:
result = validators.url(url)
if isinstance(result, ValidationFailure):
return False
return True
summarizer: Summarizer = Summarizer(create_pipeline())
if 'tc_text' not in st.session_state:
st.session_state['tc_text'] = ''
if 'sentences_length' not in st.session_state:
st.session_state['sentences_length'] = Summarizer.DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
st.header("Input")
with st.form(key='terms-and-conditions'):
sentences_length_input = st.number_input(
label='Number of sentences to be extracted:',
min_value=1,
value=st.session_state.sentences_length
)
tc_text_input = st.text_area(
value=st.session_state.tc_text,
label='Terms & conditions content or specify an URL:',
height=240
)
submit_button = st.form_submit_button(label='Summarize')
if submit_button:
if is_valid_url(tc_text_input):
(all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input,
sentences_length_input)
else:
(all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input,
sentences_length_input)
extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
abstract_summary = summarizer.abstractive_summary(extract_summary)
display_extractive_summary(all_sentences, extract_summary_sentences)
display_abstractive_summary(abstract_summary)
if __name__ == "__main__":
main()
|