Spaces:

nickmuchi
/

article-text-summarizer

Running

App Files Files Community

nickmuchi commited on Feb 10, 2022

Commit

33ebb15

1 Parent(s): e18707d

Update app.py

Browse files

Added Sshleifer-distilbart model as an option

Files changed (1) hide show

app.py +44 -14

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ def article_text_extractor(url: str):
     for sentence in sentences:
         if len(chunks) == current_chunk + 1:
-            if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 500:
                 chunks[current_chunk].extend(sentence.split(" "))
             else:
                 current_chunk += 1
@@ -123,22 +123,37 @@ def summary_downloader(raw_text):
 	st.markdown(href,unsafe_allow_html=True)
 @st.cache(allow_output_mutation=True)
-def pipeline_model():
     summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
     return summarizer
 #Streamlit App
-st.title("Article Text and Link Extractive Summarizer using Facebook-Bart-large-CNN Transformer Model 📝")
 st.markdown(
-    "Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn)"
 )
 st.markdown(
     """The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
     For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
     Please do note that the model will take longer to generate summaries for documents that are too long"""
 )
@@ -190,18 +205,33 @@ summarize = st.button("Summarize")
 # called on toggle button [summarize]
 if summarize:
-    if is_url:
-        text_to_summarize = chunks
-    else:
-        text_to_summarize = clean_text
     # extractive summarizer
-    with st.spinner(
-        text="Loading Model and Extracting summary. This might take a few seconds depending on the length of your document/text..."
-    ):
-        summarizer_model = pipeline_model()
-        summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
-        summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     # final summarized output
     st.subheader("Summarized text")

     for sentence in sentences:
         if len(chunks) == current_chunk + 1:
+            if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 600:
                 chunks[current_chunk].extend(sentence.split(" "))
             else:
                 current_chunk += 1
 	st.markdown(href,unsafe_allow_html=True)
 @st.cache(allow_output_mutation=True)
+def facebook_model():
     summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
     return summarizer
+@st.cache(allow_output_mutation=True)
+def schleifer_model():
+    summarizer = pipeline('summarization',model='sshleifer/distilbart-cnn-12-6')
+    return summarizer
 #Streamlit App
+st.title("Article Text and Link Extractive Summarizer 📝")
+model_type = st.sidebar.selectbox(
+    "Model type", options=["Facebook-Bart", "Sshleifer-DistilBart"]
+)
 st.markdown(
+    "Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn) and [Sshleifer-distilbart-cnn-12-6](https://huggingface.co/sshleifer/distilbart-cnn-12-6)"
 )
 st.markdown(
     """The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
     For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
+    There are two models available to choose from:
+    - Facebook-Bart, trained on large CNN Daily Mail articles
+    - Sshleifer-Distilbart, which is a distilled version of the large Bart model
     Please do note that the model will take longer to generate summaries for documents that are too long"""
 )
 # called on toggle button [summarize]
 if summarize:
+    if model_type == "Facebook-Bart":
+        if is_url:
+            text_to_summarize = chunks
+        else:
+            text_to_summarize = clean_text
     # extractive summarizer
+        with st.spinner(
+            text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
+        ):
+            summarizer_model = facebook_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
+            summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
+   elif model_type == "Sshleifer-DistilBart":
+        if is_url:
+            text_to_summarize = chunks
+        else:
+            text_to_summarize = clean_text
+    # extractive summarizer
+        with st.spinner(
+            text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
+        ):
+            summarizer_model = schleifer_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
+            summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     # final summarized output
     st.subheader("Summarized text")