doc-to-slides / app.py
com3dian's picture
fix path
28c51ee
raw
history blame
1.68 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
import glob
from grobidmonkey import reader
from grobid_client.grobid_client import GrobidClient
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
st.write(uploaded_file.name)
bytes_data = uploaded_file.getvalue()
st.write(len(bytes_data), "bytes")
saved_file_path = save_uploaded_file(uploaded_file)
os.makedirs("grobidoutputs", exist_ok=True)
client = GrobidClient(config_path="./grobidconfig.json")
client.process("processFulltextDocument",
"./uploads/",
n=20)
directory = "."
pattern = uploaded_file.name + ".tei.xml"
matching_files = glob.glob(f"{directory}/**/{pattern}", recursive=True)
if matching_files:
st.write("Found matching file(s):")
for file in matching_files:
st.write(file)
else:
st.write("No matching file found.")
output_file_path = matching_files
monkeyReader = reader.MonkeyReader('x2d')
outline = monkeyReader.readOutline(output_file_path)
for pre, fill, node in outline:
st.write("%s%s" % (pre, node.name))
# read paper content
essay = monkeyReader.readEssay(saved_file_path)