doc-to-slides / app.py
com3dian's picture
Update app.py
40f5c17 verified
raw
history blame
1.82 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
from grobidmonkey import reader
from grobid_client.grobid_client import GrobidClient
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
col1, col2 = st.columns([3, 1])
with col1:
uploaded_file = st.file_uploader("Choose a file")
with col2:
option = st.selectbox(
'Select parsing method.',
('monkey', 'x2d', 'lxml'))
if uploaded_file is not None:
st.write(uploaded_file.name)
bytes_data = uploaded_file.getvalue()
st.write(len(bytes_data), "bytes")
saved_file_path = save_uploaded_file(uploaded_file)
os.makedirs("./outputs", exist_ok=True)
exec("grobid_client --input ./uploads/ --output ./outputs/ processFulltextDocument")
# client = GrobidClient(config_path="./grobidconfig.json")
# client.process("processFulltextDocument",
# "./uploads/",
# consolidate_citations=True,
# tei_coordinates=True)
output_file_path = os.path.splitext(saved_file_path)[0] + ".grobid.tei.xml"
monkeyReader = reader.MonkeyReader(option)
outline = monkeyReader.readOutline(output_file_path)
for pre, fill, node in outline:
st.write("%s%s" % (pre, node.name))
# read paper content
essay = monkeyReader.readEssay(output_file_path)
for key, values in essay.items():
st.write(f"{key}: {', '.join(values)}")