Update app.py
Browse filesAdded Sshleifer-distilbart model as an option
app.py
CHANGED
@@ -50,7 +50,7 @@ def article_text_extractor(url: str):
|
|
50 |
|
51 |
for sentence in sentences:
|
52 |
if len(chunks) == current_chunk + 1:
|
53 |
-
if len(chunks[current_chunk]) + len(sentence.split(" ")) <=
|
54 |
chunks[current_chunk].extend(sentence.split(" "))
|
55 |
else:
|
56 |
current_chunk += 1
|
@@ -123,22 +123,37 @@ def summary_downloader(raw_text):
|
|
123 |
st.markdown(href,unsafe_allow_html=True)
|
124 |
|
125 |
@st.cache(allow_output_mutation=True)
|
126 |
-
def
|
127 |
|
128 |
summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
|
129 |
return summarizer
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
#Streamlit App
|
132 |
|
133 |
-
st.title("Article Text and Link Extractive Summarizer
|
|
|
|
|
|
|
|
|
134 |
|
135 |
st.markdown(
|
136 |
-
"Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn)"
|
137 |
)
|
138 |
|
139 |
st.markdown(
|
140 |
"""The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
|
141 |
For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
|
|
|
|
|
|
|
|
|
|
|
142 |
Please do note that the model will take longer to generate summaries for documents that are too long"""
|
143 |
)
|
144 |
|
@@ -190,18 +205,33 @@ summarize = st.button("Summarize")
|
|
190 |
|
191 |
# called on toggle button [summarize]
|
192 |
if summarize:
|
193 |
-
if
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
197 |
# extractive summarizer
|
198 |
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
# final summarized output
|
207 |
st.subheader("Summarized text")
|
|
|
50 |
|
51 |
for sentence in sentences:
|
52 |
if len(chunks) == current_chunk + 1:
|
53 |
+
if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 600:
|
54 |
chunks[current_chunk].extend(sentence.split(" "))
|
55 |
else:
|
56 |
current_chunk += 1
|
|
|
123 |
st.markdown(href,unsafe_allow_html=True)
|
124 |
|
125 |
@st.cache(allow_output_mutation=True)
|
126 |
+
def facebook_model():
|
127 |
|
128 |
summarizer = pipeline('summarization',model='facebook/bart-large-cnn')
|
129 |
return summarizer
|
130 |
|
131 |
+
@st.cache(allow_output_mutation=True)
|
132 |
+
def schleifer_model():
|
133 |
+
|
134 |
+
summarizer = pipeline('summarization',model='sshleifer/distilbart-cnn-12-6')
|
135 |
+
return summarizer
|
136 |
+
|
137 |
#Streamlit App
|
138 |
|
139 |
+
st.title("Article Text and Link Extractive Summarizer 📝")
|
140 |
+
|
141 |
+
model_type = st.sidebar.selectbox(
|
142 |
+
"Model type", options=["Facebook-Bart", "Sshleifer-DistilBart"]
|
143 |
+
)
|
144 |
|
145 |
st.markdown(
|
146 |
+
"Model Source: [Facebook-Bart-large-CNN](https://huggingface.co/facebook/bart-large-cnn) and [Sshleifer-distilbart-cnn-12-6](https://huggingface.co/sshleifer/distilbart-cnn-12-6)"
|
147 |
)
|
148 |
|
149 |
st.markdown(
|
150 |
"""The app supports extractive summarization which aims to identify the salient information that is then extracted and grouped together to form a concise summary.
|
151 |
For documents or text that is more than 500 words long, the app will divide the text into chunks and summarize each chunk.
|
152 |
+
There are two models available to choose from:
|
153 |
+
|
154 |
+
- Facebook-Bart, trained on large CNN Daily Mail articles
|
155 |
+
- Sshleifer-Distilbart, which is a distilled version of the large Bart model
|
156 |
+
|
157 |
Please do note that the model will take longer to generate summaries for documents that are too long"""
|
158 |
)
|
159 |
|
|
|
205 |
|
206 |
# called on toggle button [summarize]
|
207 |
if summarize:
|
208 |
+
if model_type == "Facebook-Bart":
|
209 |
+
if is_url:
|
210 |
+
text_to_summarize = chunks
|
211 |
+
else:
|
212 |
+
text_to_summarize = clean_text
|
213 |
# extractive summarizer
|
214 |
|
215 |
+
with st.spinner(
|
216 |
+
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
217 |
+
):
|
218 |
+
summarizer_model = facebook_model()
|
219 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
|
220 |
+
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
221 |
+
|
222 |
+
elif model_type == "Sshleifer-DistilBart":
|
223 |
+
if is_url:
|
224 |
+
text_to_summarize = chunks
|
225 |
+
else:
|
226 |
+
text_to_summarize = clean_text
|
227 |
+
# extractive summarizer
|
228 |
+
|
229 |
+
with st.spinner(
|
230 |
+
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
231 |
+
):
|
232 |
+
summarizer_model = schleifer_model()
|
233 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=100, min_length=30)
|
234 |
+
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
235 |
|
236 |
# final summarized output
|
237 |
st.subheader("Summarized text")
|