Spaces:
Sleeping
Sleeping
add grobid processing
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
|
|
3 |
import numpy as np
|
4 |
import os
|
5 |
from grobidmonkey import reader
|
|
|
6 |
|
7 |
def save_uploaded_file(uploaded_file):
|
8 |
file_path = os.path.join("uploads", uploaded_file.name)
|
@@ -20,6 +21,16 @@ if uploaded_file is not None:
|
|
20 |
bytes_data = uploaded_file.getvalue()
|
21 |
st.write(len(bytes_data), "bytes")
|
22 |
saved_file_path = save_uploaded_file(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
monkeyReader = reader.MonkeyReader('x2d')
|
24 |
outline = monkeyReader.readOutline(saved_file_path)
|
25 |
|
|
|
3 |
import numpy as np
|
4 |
import os
|
5 |
from grobidmonkey import reader
|
6 |
+
from grobid_client.grobid_client import GrobidClient
|
7 |
|
8 |
def save_uploaded_file(uploaded_file):
|
9 |
file_path = os.path.join("uploads", uploaded_file.name)
|
|
|
21 |
bytes_data = uploaded_file.getvalue()
|
22 |
st.write(len(bytes_data), "bytes")
|
23 |
saved_file_path = save_uploaded_file(uploaded_file)
|
24 |
+
|
25 |
+
os.makedirs("grobidoutputs", exist_ok=True)
|
26 |
+
client = GrobidClient(config_path="./config.json")
|
27 |
+
client.process("processFulltextDocument",
|
28 |
+
"./uploads/",
|
29 |
+
output="./grobidoutputs/",
|
30 |
+
n=20)
|
31 |
+
|
32 |
+
output_file_path = os.path.join("grobidoutputs", os.path.splitext(uploaded_file.name)[0] + "tei.xml")
|
33 |
+
|
34 |
monkeyReader = reader.MonkeyReader('x2d')
|
35 |
outline = monkeyReader.readOutline(saved_file_path)
|
36 |
|