File size: 1,681 Bytes
ea5c59c
 
 
8d4620d
28c51ee
9ec3b13
395fe09
ea5c59c
8d4620d
28c51ee
 
8d4620d
 
 
ea5c59c
8d4620d
ea5c59c
1ed0b9b
ea5c59c
 
 
 
 
8d4620d
395fe09
 
e5ef7f9
395fe09
28c51ee
395fe09
28c51ee
 
 
 
 
 
 
 
 
 
d7b55a9
28c51ee
5e43e62
2e65567
 
fda22ce
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
import pandas as pd
import numpy as np
import os
import glob
from grobidmonkey import reader
from grobid_client.grobid_client import GrobidClient

def save_uploaded_file(uploaded_file):
    file_path = os.path.join("./uploads", uploaded_file.name)
    os.makedirs("./uploads", exist_ok=True)  # Create 'uploads' directory if it doesn't exist
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return file_path  # Return the file path as a string

st.title('Paper2Slides')

st.subheader('Upload paper in pdf format')
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
    st.write(uploaded_file.name)
    bytes_data = uploaded_file.getvalue()
    st.write(len(bytes_data), "bytes")
    saved_file_path = save_uploaded_file(uploaded_file)
    
    os.makedirs("grobidoutputs", exist_ok=True)
    client = GrobidClient(config_path="./grobidconfig.json")
    client.process("processFulltextDocument",
                   "./uploads/",
                   n=20)
    directory = "."
    pattern = uploaded_file.name + ".tei.xml"
    matching_files = glob.glob(f"{directory}/**/{pattern}", recursive=True)
    
    if matching_files:
        st.write("Found matching file(s):")
        for file in matching_files:
            st.write(file)
    else:
        st.write("No matching file found.")

    output_file_path = matching_files

    monkeyReader = reader.MonkeyReader('x2d')
    outline = monkeyReader.readOutline(output_file_path)
    
    for pre, fill, node in outline:
        st.write("%s%s" % (pre, node.name))
    
    # read paper content
    essay = monkeyReader.readEssay(saved_file_path)