Spaces:

ml6team
/

doc-to-slides

Sleeping

App Files Files Community

doc-to-slides / app.py

com3dian

Update app.py

40f5c17 verified over 1 year ago

raw

history blame

1.82 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import os
	from grobidmonkey import reader
	from grobid_client.grobid_client import GrobidClient

	def save_uploaded_file(uploaded_file):
	file_path = os.path.join("./uploads", uploaded_file.name)
	os.makedirs("./uploads", exist_ok=True) # Create 'uploads' directory if it doesn't exist
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())
	return file_path # Return the file path as a string

	st.title('Paper2Slides')

	st.subheader('Upload paper in pdf format')

	col1, col2 = st.columns([3, 1])
	with col1:
	uploaded_file = st.file_uploader("Choose a file")
	with col2:
	option = st.selectbox(
	'Select parsing method.',
	('monkey', 'x2d', 'lxml'))

	if uploaded_file is not None:

	st.write(uploaded_file.name)
	bytes_data = uploaded_file.getvalue()
	st.write(len(bytes_data), "bytes")


	saved_file_path = save_uploaded_file(uploaded_file)
	os.makedirs("./outputs", exist_ok=True)
	exec("grobid_client --input ./uploads/ --output ./outputs/ processFulltextDocument")

	# client = GrobidClient(config_path="./grobidconfig.json")
	# client.process("processFulltextDocument",
	# "./uploads/",
	# consolidate_citations=True,
	# tei_coordinates=True)

	output_file_path = os.path.splitext(saved_file_path)[0] + ".grobid.tei.xml"
	monkeyReader = reader.MonkeyReader(option)
	outline = monkeyReader.readOutline(output_file_path)
	for pre, fill, node in outline:
	st.write("%s%s" % (pre, node.name))


	# read paper content
	essay = monkeyReader.readEssay(output_file_path)
	for key, values in essay.items():
	st.write(f"{key}: {', '.join(values)}")