Spaces:

OrganizedProgrammers
/

arXiv

Sleeping

App Files Files Community

Omar ID EL MOUMEN commited on Mar 26

Commit

9513d18

1 Parent(s): 5e9984e

Change CRUD: GET -> POST

Browse files

Files changed (1) hide show

app.py +19 -13

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
 import xml.etree.ElementTree as xmlparser
 import requests
 import sys
 import random
 import fitz
@@ -40,12 +41,17 @@ app.add_middleware(
 async def root():
     return {"message": "API started successfully"}
-@app.get("/search/{keyword}/{limit}")
-async def get_articles(keyword: str, limit: int):
     XML_NAMESPACE = "{http://www.w3.org/2005/Atom}"
     content = {}
     try:
-        arxiv_search_result = requests.get(f"http://export.arxiv.org/api/query?search_query=all:{keyword}&max_results={limit}", verify=False)
         response = xmlparser.fromstring(arxiv_search_result.text)
         publications = response.findall(f"{XML_NAMESPACE}entry")
         for pub in publications:
@@ -64,10 +70,10 @@ async def get_articles(keyword: str, limit: int):
     except Exception as e:
         print(f"Error while downloading data : {str(e)}")
         return {"error": True, "message": str(e)}
-@app.get("/extract/{id_doc}")
-async def extract_text_pdf(id_doc: str):
-    pdf_req = requests.get(f"http://arxiv.org/pdf/{id_doc}", verify=False)
     if pdf_req.status_code == 200:
         pdf_data = BytesIO(pdf_req.content)
         doc = fitz.open(stream=pdf_data, filetype="pdf")
@@ -100,14 +106,14 @@ async def extract_text_pdf(id_doc: str):
             for title in titles:
                 if title[0] == 1:
                     main_titles.append(title[1])
-        return {"pub_id": id_doc, "titles": main_titles, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": id_doc, "titles": "No titles found !", "text": postprocess_text, "error": False}
     else:
-        print("ID: " + id_doc)
-        print("URL: " + f"http://arxiv.org/pdf/{id_doc}")
         print("Status code: " + str(pdf_req.status_code))
         return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
-@app.get("/extract/random/{keyword}/{limit}")
-async def extract_random_pdf(keyword: str, limit: int):
-    pubs = await get_articles(keyword, limit)
     return await extract_text_pdf(random.choice(list(pubs["message"].keys())))

 from contextlib import asynccontextmanager
 import xml.etree.ElementTree as xmlparser
 import requests
+from pydantic import BaseModel
 import sys
 import random
 import fitz
 async def root():
     return {"message": "API started successfully"}
+class Query(BaseModel):
+    keyword: str
+    limit: int
+# Put all GET into POST
+@app.post("/search")
+async def get_articles(query: Query):
     XML_NAMESPACE = "{http://www.w3.org/2005/Atom}"
     content = {}
     try:
+        arxiv_search_result = requests.get(f"http://export.arxiv.org/api/query?search_query=all:{query.keyword}&max_results={query.limit}", verify=False)
         response = xmlparser.fromstring(arxiv_search_result.text)
         publications = response.findall(f"{XML_NAMESPACE}entry")
         for pub in publications:
     except Exception as e:
         print(f"Error while downloading data : {str(e)}")
         return {"error": True, "message": str(e)}
+@app.post("/extract")
+async def extract_text_pdf(doc_id: str):
+    pdf_req = requests.get(f"http://arxiv.org/pdf/{doc_id}", verify=False)
     if pdf_req.status_code == 200:
         pdf_data = BytesIO(pdf_req.content)
         doc = fitz.open(stream=pdf_data, filetype="pdf")
             for title in titles:
                 if title[0] == 1:
                     main_titles.append(title[1])
+        return {"pub_id": doc_id, "titles": main_titles, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
     else:
+        print("ID: " + doc_id)
+        print("URL: " + f"http://arxiv.org/pdf/{doc_id}")
         print("Status code: " + str(pdf_req.status_code))
         return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
+@app.post("/extract/random")
+async def extract_random_pdf(query: Query):
+    pubs = await get_articles(query)
     return await extract_text_pdf(random.choice(list(pubs["message"].keys())))