Spaces:
Runtime error
Runtime error
File size: 2,156 Bytes
c42ad4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
def get_pdf_text(pdf_path):
# creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
# creating a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
# extract text
total_text_list = []
for i in range(len(pdf_reader.pages)):
page_text = pdf_reader.pages[i].extract_text()
total_text_list.append(page_text)
pdf_text = " ".join(total_text_list)
pdfFileObj.close()
return pdf_text
tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])
with tab_general_topics:
html("", height=10)
st.header("See the status of a research topic through a summary of the most cited papers")
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])
with tab_your_paper:
html("", height=10)
st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
""")
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
if pdf_files:
recently_added = []
for pdf in pdf_files:
# Saving the files
pdf_data = pdf.getvalue()
pdf_path = os.path.join("pdfs", pdf.name)
with open(pdf_path, "wb") as f:
f.write(pdf_data)
recently_added.append(pdf_path)
pdfs_content_list = []
print("*****", recently_added)
for recent_pdf in recently_added:
# Reading the pdf files
pdf_content = get_pdf_text(recent_pdf)
print("**", pdf_content)
pdfs_content_list.append(pdf_content)
# Delete the files
os.remove(recent_pdf)
print("************************", len(pdfs_content_list))
print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
all_text_together = " ".join(pdfs_content_list)
st.write(all_text_together) |