Spaces:
Sleeping
Sleeping
EmreYY20
commited on
Commit
·
5f89cc0
1
Parent(s):
586efa7
add hybrid summarization
Browse files- app.py +20 -15
- hybrid_summarization.py +6 -0
app.py
CHANGED
|
@@ -1,26 +1,26 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from rouge import Rouge
|
| 4 |
from datasets import load_dataset
|
| 5 |
-
import PyPDF2
|
| 6 |
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
|
| 7 |
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
|
| 8 |
from keyword_extraction import extract_keywords
|
| 9 |
from keyphrase_extraction import extract_sentences_with_obligations
|
| 10 |
-
from
|
| 11 |
-
import matplotlib.pyplot as plt
|
| 12 |
-
from PIL import Image
|
| 13 |
-
import io
|
| 14 |
-
#from blanc import BlancHelp
|
| 15 |
|
| 16 |
-
|
| 17 |
-
# Load in ToS
|
| 18 |
dataset = load_dataset("EE21/ToS-Summaries")
|
| 19 |
|
| 20 |
# Extract titles or identifiers for the ToS
|
| 21 |
tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
|
| 22 |
-
|
| 23 |
-
|
| 24 |
# Set page to wide mode
|
| 25 |
st.set_page_config(layout="wide")
|
| 26 |
|
|
@@ -41,7 +41,7 @@ def main():
|
|
| 41 |
|
| 42 |
# Left column: Radio buttons for summarizer choice
|
| 43 |
with col1:
|
| 44 |
-
radio_options = ["Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
|
| 45 |
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
|
| 46 |
|
| 47 |
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
|
|
@@ -77,6 +77,11 @@ def main():
|
|
| 77 |
st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
|
| 78 |
return
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Perform extractive summarization
|
| 81 |
if radio_selection == "Extractive (TextRank)":
|
| 82 |
summary = summarize_with_textrank(file_content)
|
|
@@ -87,22 +92,22 @@ def main():
|
|
| 87 |
summary = summarize_with_lsa(file_content)
|
| 88 |
st.session_state.summary = summary
|
| 89 |
|
| 90 |
-
# Perform
|
| 91 |
if radio_selection == "Abstractive (BART Fine-tuned)":
|
| 92 |
summary = summarize_with_bart_ft(file_content)
|
| 93 |
st.session_state.summary = summary
|
| 94 |
|
| 95 |
-
# Perform
|
| 96 |
if radio_selection == "Abstractive (BART-large-CNN)":
|
| 97 |
summary = summarize_with_bart_cnn(file_content)
|
| 98 |
st.session_state.summary = summary
|
| 99 |
|
| 100 |
-
# Perform
|
| 101 |
if radio_selection == "Abstractive (LongT5)":
|
| 102 |
summary = summarize_with_t5(file_content)
|
| 103 |
st.session_state.summary = summary
|
| 104 |
|
| 105 |
-
# Perform
|
| 106 |
if radio_selection == "Abstractive (LED)":
|
| 107 |
summary = summarize_with_led(file_content)
|
| 108 |
st.session_state.summary = summary
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import re
|
| 3 |
+
import PyPDF2
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import io
|
| 6 |
+
from wordcloud import WordCloud
|
| 7 |
+
from PIL import Image
|
| 8 |
+
|
| 9 |
from rouge import Rouge
|
| 10 |
from datasets import load_dataset
|
|
|
|
| 11 |
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
|
| 12 |
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
|
| 13 |
from keyword_extraction import extract_keywords
|
| 14 |
from keyphrase_extraction import extract_sentences_with_obligations
|
| 15 |
+
from hybrid_summarization import summarize_hybrid
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
#-------------------------------------------------------------------#
|
| 18 |
+
# Load in ToS-Summaries dataset
|
| 19 |
dataset = load_dataset("EE21/ToS-Summaries")
|
| 20 |
|
| 21 |
# Extract titles or identifiers for the ToS
|
| 22 |
tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
|
| 23 |
+
|
|
|
|
| 24 |
# Set page to wide mode
|
| 25 |
st.set_page_config(layout="wide")
|
| 26 |
|
|
|
|
| 41 |
|
| 42 |
# Left column: Radio buttons for summarizer choice
|
| 43 |
with col1:
|
| 44 |
+
radio_options = ["Hybrid (RAKE + BART Fine-tuned)", "Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
|
| 45 |
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
|
| 46 |
|
| 47 |
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
|
|
|
|
| 77 |
st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
|
| 78 |
return
|
| 79 |
|
| 80 |
+
# Perform hybrid summarization
|
| 81 |
+
if radio_selection == "Hybrid (RAKE + BART Fine-tuned)":
|
| 82 |
+
summary = summarize_hybrid(file_content)
|
| 83 |
+
st.session_state.summary = summary
|
| 84 |
+
|
| 85 |
# Perform extractive summarization
|
| 86 |
if radio_selection == "Extractive (TextRank)":
|
| 87 |
summary = summarize_with_textrank(file_content)
|
|
|
|
| 92 |
summary = summarize_with_lsa(file_content)
|
| 93 |
st.session_state.summary = summary
|
| 94 |
|
| 95 |
+
# Perform abstractive summarization
|
| 96 |
if radio_selection == "Abstractive (BART Fine-tuned)":
|
| 97 |
summary = summarize_with_bart_ft(file_content)
|
| 98 |
st.session_state.summary = summary
|
| 99 |
|
| 100 |
+
# Perform abstractive summarization
|
| 101 |
if radio_selection == "Abstractive (BART-large-CNN)":
|
| 102 |
summary = summarize_with_bart_cnn(file_content)
|
| 103 |
st.session_state.summary = summary
|
| 104 |
|
| 105 |
+
# Perform abstractive summarization
|
| 106 |
if radio_selection == "Abstractive (LongT5)":
|
| 107 |
summary = summarize_with_t5(file_content)
|
| 108 |
st.session_state.summary = summary
|
| 109 |
|
| 110 |
+
# Perform abstractive summarization
|
| 111 |
if radio_selection == "Abstractive (LED)":
|
| 112 |
summary = summarize_with_led(file_content)
|
| 113 |
st.session_state.summary = summary
|
hybrid_summarization.py
CHANGED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keyphrase_extraction import extract_sentences_with_obligations
|
| 2 |
+
from abstractive_summarization import summarize_with_bart_ft
|
| 3 |
+
|
| 4 |
+
def summarize_hybrid(text):
|
| 5 |
+
extract = extract_sentences_with_obligations(text)
|
| 6 |
+
return summarize_with_bart_ft(extract)
|