Spaces:
Runtime error
Runtime error
Upload main.py
Browse files
main.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from streamlit.components.v1 import html
|
| 3 |
+
import os
|
| 4 |
+
import PyPDF2
|
| 5 |
+
|
| 6 |
+
def get_pdf_text(pdf_path):
|
| 7 |
+
# creating a pdf file object
|
| 8 |
+
pdfFileObj = open(pdf_path, 'rb')
|
| 9 |
+
|
| 10 |
+
# creating a pdf reader object
|
| 11 |
+
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
|
| 12 |
+
|
| 13 |
+
# extract text
|
| 14 |
+
total_text_list = []
|
| 15 |
+
|
| 16 |
+
for i in range(len(pdf_reader.pages)):
|
| 17 |
+
page_text = pdf_reader.pages[i].extract_text()
|
| 18 |
+
total_text_list.append(page_text)
|
| 19 |
+
|
| 20 |
+
pdf_text = " ".join(total_text_list)
|
| 21 |
+
pdfFileObj.close()
|
| 22 |
+
|
| 23 |
+
return pdf_text
|
| 24 |
+
|
| 25 |
+
tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])
|
| 26 |
+
|
| 27 |
+
with tab_general_topics:
|
| 28 |
+
html("", height=10)
|
| 29 |
+
|
| 30 |
+
st.header("See the status of a research topic through a summary of the most cited papers")
|
| 31 |
+
|
| 32 |
+
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])
|
| 33 |
+
|
| 34 |
+
with tab_your_paper:
|
| 35 |
+
html("", height=10)
|
| 36 |
+
|
| 37 |
+
st.markdown("""
|
| 38 |
+
### Simply upload one or multiple PDFs and we summarize the content for you!
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
|
| 42 |
+
if pdf_files:
|
| 43 |
+
recently_added = []
|
| 44 |
+
for pdf in pdf_files:
|
| 45 |
+
# Saving the files
|
| 46 |
+
pdf_data = pdf.getvalue()
|
| 47 |
+
pdf_path = os.path.join("pdfs", pdf.name)
|
| 48 |
+
with open(pdf_path, "wb") as f:
|
| 49 |
+
f.write(pdf_data)
|
| 50 |
+
recently_added.append(pdf_path)
|
| 51 |
+
|
| 52 |
+
pdfs_content_list = []
|
| 53 |
+
print("*****", recently_added)
|
| 54 |
+
for recent_pdf in recently_added:
|
| 55 |
+
# Reading the pdf files
|
| 56 |
+
pdf_content = get_pdf_text(recent_pdf)
|
| 57 |
+
print("**", pdf_content)
|
| 58 |
+
pdfs_content_list.append(pdf_content)
|
| 59 |
+
|
| 60 |
+
# Delete the files
|
| 61 |
+
os.remove(recent_pdf)
|
| 62 |
+
|
| 63 |
+
print("************************", len(pdfs_content_list))
|
| 64 |
+
print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
|
| 65 |
+
all_text_together = " ".join(pdfs_content_list)
|
| 66 |
+
|
| 67 |
+
st.write(all_text_together)
|