Spaces:
Sleeping
Sleeping
File size: 1,628 Bytes
ea5c59c 8d4620d 9ec3b13 395fe09 ea5c59c 8d4620d 28c51ee 8d4620d ea5c59c 8d4620d ea5c59c 1ed0b9b ea5c59c c1816fd ea5c59c c1816fd 8d4620d 395fe09 e5ef7f9 395fe09 28c51ee b343a4b b10c1c5 c1816fd b10c1c5 fda22ce 51b9227 fda22ce 743e4de 17f90e8 51b9227 17f90e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import streamlit as st
import pandas as pd
import numpy as np
import os
from grobidmonkey import reader
from grobid_client.grobid_client import GrobidClient
def save_uploaded_file(uploaded_file):
file_path = os.path.join("./uploads", uploaded_file.name)
os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path # Return the file path as a string
st.title('Paper2Slides')
st.subheader('Upload paper in pdf format')
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
st.write(uploaded_file.name)
bytes_data = uploaded_file.getvalue()
st.write(len(bytes_data), "bytes")
option = st.selectbox(
'Please select one of the parsing method.',
('monkey', 'x2d', 'lxml'))
saved_file_path = save_uploaded_file(uploaded_file)
client = GrobidClient(config_path="./grobidconfig.json")
client.process("processFulltextDocument",
"./uploads/",
consolidate_citations=True,
tei_coordinates=True)
output_file_path = os.path.splitext(saved_file_path)[0] + ".grobid.tei.xml"
monkeyReader = reader.MonkeyReader(option)
outline = monkeyReader.readOutline(output_file_path)
for pre, fill, node in outline:
st.write("%s%s" % (pre, node.name))
# read paper content
essay = monkeyReader.readEssay(output_file_path)
for key, values in essay.items():
st.write(f"{key}: {', '.join(values)}")
|