File size: 3,695 Bytes
abcaca9
9c2785c
abcaca9
 
 
9c2785c
abcaca9
8d4dd5e
 
 
 
 
90f2ef6
8d4dd5e
 
90f2ef6
9c2785c
 
 
8d4dd5e
abcaca9
8d4dd5e
9c2785c
 
 
 
 
abcaca9
9c2785c
 
90f2ef6
9c2785c
 
 
 
 
 
abcaca9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2785c
8d4dd5e
abcaca9
8d4dd5e
 
 
9c2785c
abcaca9
90f2ef6
abcaca9
8d4dd5e
 
 
 
 
 
 
 
abcaca9
90f2ef6
 
9c2785c
abcaca9
8d4dd5e
 
 
abcaca9
 
 
 
 
 
8d4dd5e
abcaca9
 
8d4dd5e
abcaca9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import nltk
import streamlit as st
import validators
from transformers import pipeline
from validators import ValidationFailure

from Summarizer import Summarizer

nltk.download('punkt')

DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10

st.markdown('# Terms & conditions abstractive summarization model :pencil:')
st.write('This app provides the abstract summary of the provided terms & conditions. '
         'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')

st.markdown("""
To use this:
- Number of sentences to be extracted is configurable
- Specify an URL to extract contents OR copy terms & conditions content and hit 'Summarize'
""")


@st.cache(allow_output_mutation=True,
          suppress_st_warning=True,
          show_spinner=False)
def create_pipeline():
    with st.spinner('Please wait for the model to load...'):
        terms_and_conditions_pipeline = pipeline(
            task='summarization',
            model='ml6team/distilbart-tos-summarizer-tosdr',
            tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
        )
    return terms_and_conditions_pipeline


def display_abstractive_summary(summary) -> None:
    st.subheader("Abstractive Summary")
    st.markdown('#####')
    st.markdown(summary)


def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None:
    st.subheader("Extractive Summary")
    st.markdown('#####')
    terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences)
    replaced_text = terms_and_conditions
    for sentence in summary_sentences:
        replaced_text = replaced_text.replace(sentence, f"<span style='background-color: #FFFF00'>{sentence}</span>")
    st.write(replaced_text, unsafe_allow_html=True)


def is_valid_url(url: str) -> bool:
    result = validators.url(url)
    if isinstance(result, ValidationFailure):
        return False
    return True


summarizer: Summarizer = Summarizer(create_pipeline())

if 'tc_text' not in st.session_state:
    st.session_state['tc_text'] = ''

if 'sentences_length' not in st.session_state:
    st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH

st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
st.header("Input")

with st.form(key='terms-and-conditions'):
    sentences_length_input = st.number_input(
        label='Number of sentences to be extracted:',
        min_value=1,
        value=st.session_state.sentences_length
    )
    tc_text_input = st.text_area(
        value=st.session_state.tc_text,
        label='Terms & conditions content or specify an URL:',
        height=240
    )

    submit_button = st.form_submit_button(label='Summarize')

if submit_button:

    if is_valid_url(tc_text_input):
        (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input,
                                                                                            sentences_length_input)
    else:
        (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input,
                                                                                             sentences_length_input)

    extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
    abstract_summary = summarizer.abstractive_summary(extract_summary)

    display_extractive_summary(all_sentences, extract_summary_sentences)
    display_abstractive_summary(abstract_summary)