Spaces:

NBayer
/

Streamlit_app_paper

Runtime error

File size: 2,156 Bytes

c42ad4e

import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2

def get_pdf_text(pdf_path):
    # creating a pdf file object
    pdfFileObj = open(pdf_path, 'rb')
        
    # creating a pdf reader object
    pdf_reader = PyPDF2.PdfReader(pdfFileObj)

    # extract text
    total_text_list = []

    for i in range(len(pdf_reader.pages)):
        page_text = pdf_reader.pages[i].extract_text()
        total_text_list.append(page_text)

    pdf_text = " ".join(total_text_list)
    pdfFileObj.close()

    return pdf_text

tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])

with tab_general_topics:
    html("", height=10)

    st.header("See the status of a research topic through a summary of the most cited papers")

    st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])

with tab_your_paper:
    html("", height=10)

    st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
    """)

    pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
    if pdf_files:
        recently_added = []
        for pdf in pdf_files:
            # Saving the files
            pdf_data = pdf.getvalue()
            pdf_path = os.path.join("pdfs", pdf.name)
            with open(pdf_path, "wb") as f:
                f.write(pdf_data)
                recently_added.append(pdf_path)

        pdfs_content_list = []
        print("*****", recently_added)
        for recent_pdf in recently_added:
            # Reading the pdf files
            pdf_content = get_pdf_text(recent_pdf)
            print("**", pdf_content)
            pdfs_content_list.append(pdf_content)

            # Delete the files
            os.remove(recent_pdf)

        print("************************", len(pdfs_content_list))
        print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
        all_text_together = " ".join(pdfs_content_list)

        st.write(all_text_together)