File size: 1,780 Bytes
ea5c59c
 
 
8d4620d
28c51ee
9ec3b13
395fe09
ea5c59c
8d4620d
28c51ee
 
8d4620d
 
 
ea5c59c
8d4620d
ea5c59c
1ed0b9b
ea5c59c
 
 
 
 
8d4620d
395fe09
5113d80
e5ef7f9
395fe09
28c51ee
619f943
395fe09
5113d80
 
675fdb7
0726521
28c51ee
 
 
 
 
 
 
 
d7b55a9
cfcf528
5e43e62
2e65567
 
fda22ce
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import pandas as pd
import numpy as np
import os
import glob
from grobidmonkey import reader
from grobid_client.grobid_client import GrobidClient

def save_uploaded_file(uploaded_file):
    file_path = os.path.join("./uploads", uploaded_file.name)
    os.makedirs("./uploads", exist_ok=True)  # Create 'uploads' directory if it doesn't exist
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return file_path  # Return the file path as a string

st.title('Paper2Slides')

st.subheader('Upload paper in pdf format')
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
    st.write(uploaded_file.name)
    bytes_data = uploaded_file.getvalue()
    st.write(len(bytes_data), "bytes")
    saved_file_path = save_uploaded_file(uploaded_file)
    
    os.makedirs("./grobidoutputs", exist_ok=True)
    client = GrobidClient(config_path="./grobidconfig.json")
    client.process("processFulltextDocument",
                   "./uploads/",
                   output="./grobidoutputs/",
                   n=20)
                
    directory = "$HOME/app"
    
    pattern = os.path.splitext(file_name)[0] + "grobid.tei.xml"
    matching_files = glob.glob(f"{directory}/**/{pattern}", recursive=True)
    
    if matching_files:
        st.write("Found matching file(s):")
        for file in matching_files:
            st.write(file)
    else:
        st.write("No matching file found.")

    output_file_path = matching_files[0]

    monkeyReader = reader.MonkeyReader('x2d')
    outline = monkeyReader.readOutline(output_file_path)
    
    for pre, fill, node in outline:
        st.write("%s%s" % (pre, node.name))
    
    # read paper content
    essay = monkeyReader.readEssay(saved_file_path)