nickmuchi commited on
Commit
33ebb15
·
1 Parent(s): e18707d

Update app.py

Browse files

Added Sshleifer-distilbart model as an option

Files changed (1) hide show
  1. app.py +44 -14
app.py CHANGED
@@ -50,7 +50,7 @@ def article_text_extractor(url: str):
50
 
51
  for sentence in sentences:
52
  if len(chunks) == current_chunk + 1:
53
- if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 500:
54
  chunks[current_chunk].extend(sentence.split(" "))
55
  else:
56
  current_chunk += 1
@@ -123,22 +123,37 @@ def summary_downloader(raw_text):
123
  st.markdown(href,unsafe_allow_html=True)
124
 
125
  @st.cache(allow_output_mutation=True)
126
- def pipeline_model():
127
 
128
  summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
129
  return summarizer
130
 
 
 
 
 
 
 
131
  #Streamlit App
132
 
133
- st.title("Article Text and Link Extractive Summarizer using Facebook-Bart-large-CNN Transformer Model 📝")
 
 
 
 
134
 
135
  st.markdown(
136
- "Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn)"
137
  )
138
 
139
  st.markdown(
140
  """The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
141
  For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
 
 
 
 
 
142
  Please do note that the model will take longer to generate summaries for documents that are too long"""
143
  )
144
 
@@ -190,18 +205,33 @@ summarize = st.button("Summarize")
190
 
191
  # called on toggle button [summarize]
192
  if summarize:
193
- if is_url:
194
- text_to_summarize = chunks
195
- else:
196
- text_to_summarize = clean_text
 
197
  # extractive summarizer
198
 
199
- with st.spinner(
200
- text="Loading Model and Extracting summary. This might take a few seconds depending on the length of your document/text..."
201
- ):
202
- summarizer_model = pipeline_model()
203
- summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
204
- summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # final summarized output
207
  st.subheader("Summarized text")
 
50
 
51
  for sentence in sentences:
52
  if len(chunks) == current_chunk + 1:
53
+ if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 600:
54
  chunks[current_chunk].extend(sentence.split(" "))
55
  else:
56
  current_chunk += 1
 
123
  st.markdown(href,unsafe_allow_html=True)
124
 
125
  @st.cache(allow_output_mutation=True)
126
+ def facebook_model():
127
 
128
  summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
129
  return summarizer
130
 
131
+ @st.cache(allow_output_mutation=True)
132
+ def schleifer_model():
133
+
134
+ summarizer = pipeline('summarization',model='sshleifer/distilbart-cnn-12-6')
135
+ return summarizer
136
+
137
  #Streamlit App
138
 
139
+ st.title("Article Text and Link Extractive Summarizer 📝")
140
+
141
+ model_type = st.sidebar.selectbox(
142
+ "Model type", options=["Facebook-Bart", "Sshleifer-DistilBart"]
143
+ )
144
 
145
  st.markdown(
146
+ "Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn) and [Sshleifer-distilbart-cnn-12-6](https://huggingface.co/sshleifer/distilbart-cnn-12-6)"
147
  )
148
 
149
  st.markdown(
150
  """The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
151
  For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
152
+ There are two models available to choose from:
153
+
154
+ - Facebook-Bart, trained on large CNN Daily Mail articles
155
+ - Sshleifer-Distilbart, which is a distilled version of the large Bart model
156
+
157
  Please do note that the model will take longer to generate summaries for documents that are too long"""
158
  )
159
 
 
205
 
206
  # called on toggle button [summarize]
207
  if summarize:
208
+ if model_type == "Facebook-Bart":
209
+ if is_url:
210
+ text_to_summarize = chunks
211
+ else:
212
+ text_to_summarize = clean_text
213
  # extractive summarizer
214
 
215
+ with st.spinner(
216
+ text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
217
+ ):
218
+ summarizer_model = facebook_model()
219
+ summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
220
+ summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
221
+
222
+ elif model_type == "Sshleifer-DistilBart":
223
+ if is_url:
224
+ text_to_summarize = chunks
225
+ else:
226
+ text_to_summarize = clean_text
227
+ # extractive summarizer
228
+
229
+ with st.spinner(
230
+ text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
231
+ ):
232
+ summarizer_model = schleifer_model()
233
+ summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
234
+ summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
235
 
236
  # final summarized output
237
  st.subheader("Summarized text")