Spaces:
Sleeping
Sleeping
Omar ID EL MOUMEN
commited on
Commit
·
aea4c94
1
Parent(s):
88cff0c
Add pub_id for recognition
Browse files
app.py
CHANGED
@@ -91,7 +91,7 @@ async def extract_text_pdf(id_doc: str):
|
|
91 |
postprocess_text = remove_punctuations(postprocess_text)
|
92 |
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
|
93 |
titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE) if len(doc.get_toc()) <= 0 else doc.get_toc()
|
94 |
-
return {"
|
95 |
else:
|
96 |
print("ID: " + id_doc)
|
97 |
print("URL: " + f"http://arxiv.org/pdf/{id_doc}")
|
|
|
91 |
postprocess_text = remove_punctuations(postprocess_text)
|
92 |
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
|
93 |
titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE) if len(doc.get_toc()) <= 0 else doc.get_toc()
|
94 |
+
return {"message": titles, "pub_id": id_doc, "error": False}
|
95 |
else:
|
96 |
print("ID: " + id_doc)
|
97 |
print("URL: " + f"http://arxiv.org/pdf/{id_doc}")
|