Omar ID EL MOUMEN commited on
Commit
aea4c94
·
1 Parent(s): 88cff0c

Add pub_id for recognition

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -91,7 +91,7 @@ async def extract_text_pdf(id_doc: str):
91
  postprocess_text = remove_punctuations(postprocess_text)
92
  regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
93
  titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE) if len(doc.get_toc()) <= 0 else doc.get_toc()
94
- return {"error": False, "message": titles}
95
  else:
96
  print("ID: " + id_doc)
97
  print("URL: " + f"http://arxiv.org/pdf/{id_doc}")
 
91
  postprocess_text = remove_punctuations(postprocess_text)
92
  regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
93
  titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE) if len(doc.get_toc()) <= 0 else doc.get_toc()
94
+ return {"message": titles, "pub_id": id_doc, "error": False}
95
  else:
96
  print("ID: " + id_doc)
97
  print("URL: " + f"http://arxiv.org/pdf/{id_doc}")