com3dian commited on
Commit
395fe09
·
1 Parent(s): 7579ae1

add grobid processing

Browse files
Files changed (1) hide show
  1. app.py +11 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import numpy as np
4
  import os
5
  from grobidmonkey import reader
 
6
 
7
  def save_uploaded_file(uploaded_file):
8
  file_path = os.path.join("uploads", uploaded_file.name)
@@ -20,6 +21,16 @@ if uploaded_file is not None:
20
  bytes_data = uploaded_file.getvalue()
21
  st.write(len(bytes_data), "bytes")
22
  saved_file_path = save_uploaded_file(uploaded_file)
 
 
 
 
 
 
 
 
 
 
23
  monkeyReader = reader.MonkeyReader('x2d')
24
  outline = monkeyReader.readOutline(saved_file_path)
25
 
 
3
  import numpy as np
4
  import os
5
  from grobidmonkey import reader
6
+ from grobid_client.grobid_client import GrobidClient
7
 
8
  def save_uploaded_file(uploaded_file):
9
  file_path = os.path.join("uploads", uploaded_file.name)
 
21
  bytes_data = uploaded_file.getvalue()
22
  st.write(len(bytes_data), "bytes")
23
  saved_file_path = save_uploaded_file(uploaded_file)
24
+
25
+ os.makedirs("grobidoutputs", exist_ok=True)
26
+ client = GrobidClient(config_path="./config.json")
27
+ client.process("processFulltextDocument",
28
+ "./uploads/",
29
+ output="./grobidoutputs/",
30
+ n=20)
31
+
32
+ output_file_path = os.path.join("grobidoutputs", os.path.splitext(uploaded_file.name)[0] + "tei.xml")
33
+
34
  monkeyReader = reader.MonkeyReader('x2d')
35
  outline = monkeyReader.readOutline(saved_file_path)
36