import streamlit as st import pandas as pd import numpy as np import os import glob from grobidmonkey import reader from grobid_client.grobid_client import GrobidClient def save_uploaded_file(uploaded_file): file_path = os.path.join("./uploads", uploaded_file.name) os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) return file_path # Return the file path as a string st.title('Paper2Slides') st.subheader('Upload paper in pdf format') uploaded_file = st.file_uploader("Choose a file") if uploaded_file is not None: st.write(uploaded_file.name) bytes_data = uploaded_file.getvalue() st.write(len(bytes_data), "bytes") saved_file_path = save_uploaded_file(uploaded_file) os.makedirs("grobidoutputs", exist_ok=True) client = GrobidClient(config_path="./grobidconfig.json") client.process("processFulltextDocument", "./uploads/", n=20) directory = "." pattern = uploaded_file.name + ".tei.xml" matching_files = glob.glob(f"{directory}/**/{pattern}", recursive=True) if matching_files: st.write("Found matching file(s):") for file in matching_files: st.write(file) else: st.write("No matching file found.") output_file_path = matching_files monkeyReader = reader.MonkeyReader('x2d') outline = monkeyReader.readOutline(output_file_path) for pre, fill, node in outline: st.write("%s%s" % (pre, node.name)) # read paper content essay = monkeyReader.readEssay(saved_file_path)