Spaces:
Sleeping
Sleeping
File size: 3,304 Bytes
c0ae847 03ddbfd 97f7d3e 46193fd 20d6d68 03ddbfd 20d6d68 97f7d3e 20d6d68 03ddbfd 20d6d68 5ea550e 20d6d68 b87bcef 20d6d68 2a61e91 97f7d3e 2a61e91 03ddbfd 20d6d68 b87bcef 20d6d68 2a61e91 b87bcef 6f823ae f497f7f 2a61e91 6f823ae 20d6d68 97f7d3e 20d6d68 b87bcef 20d6d68 6f823ae 97f7d3e 20d6d68 b87bcef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import streamlit as st
import PyPDF2
from extractive_model import summarize_with_textrank
from nltk.tokenize import sent_tokenize
# Set page to wide mode
st.set_page_config(layout="wide")
# Function to handle file upload and return its content
def load_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
pdf_text = ""
for page_num in range(len(pdf_reader.pages)):
pdf_text += pdf_reader.pages[page_num].extract_text() or ""
return pdf_text
# Function to calculate overlap
def calculate_overlap(original_text, summary_text):
original_sentences = set(sent_tokenize(original_text))
summary_sentences = set(sent_tokenize(summary_text))
overlap_count = sum(1 for sentence in summary_sentences if sentence in original_sentences)
overlap_percentage = (overlap_count / len(original_sentences)) * 100 if original_sentences else 0
return overlap_percentage
# Main app
def main():
st.title("Terms of Service Summarizer")
# Layout: 3 columns
col1, col2, col3 = st.columns([1, 3, 2], gap="large")
# Left column: Radio buttons for summarizer choice
with col1:
radio_options = ['Abstractive', 'Extractive']
radio_selection = st.radio("Choose type of summarizer:", radio_options)
# Middle column: Text input and File uploader
with col2:
user_input = st.text_area("Enter your text here:")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if st.button("Summarize"):
if uploaded_file and user_input:
st.warning("Please provide either text input or a PDF file, not both.")
return
# Perform overlap calculation
if 'summary' in st.session_state:
overlap = calculate_overlap(file_content, st.session_state.summary)
st.session_state.overlap = overlap
elif uploaded_file:
# Extract text from PDF
file_content = load_pdf(uploaded_file)
st.write("PDF uploaded successfully.")
elif user_input:
file_content = user_input
else:
st.warning("Please upload a PDF or enter some text to summarize.")
return
# Perform extractive summarization
if radio_selection == "Extractive":
# Perform extractive summarization
summary = summarize_with_textrank(file_content)
st.session_state.summary = summary
# Calculate overlap
overlap = calculate_overlap(file_content, summary)
st.session_state.overlap = overlap
# Perform extractive summarization
if radio_selection == "Abstractive":
None
#summary = summarize_with_textrank(file_content)
#st.session_state.summary = summary
# Right column: Displaying text after pressing 'Summarize'
with col3:
st.write("Summary:")
if 'summary' in st.session_state:
st.write(st.session_state.summary)
if radio_selection == "Extractive" and 'overlap' in st.session_state:
st.write(f"Overlap with Original Text: {st.session_state.overlap:.2f}%")
if __name__ == "__main__":
main()
|