Omar ID EL MOUMEN commited on
Commit
9513d18
·
1 Parent(s): 5e9984e

Change CRUD: GET -> POST

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -3,6 +3,7 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from contextlib import asynccontextmanager
4
  import xml.etree.ElementTree as xmlparser
5
  import requests
 
6
  import sys
7
  import random
8
  import fitz
@@ -40,12 +41,17 @@ app.add_middleware(
40
  async def root():
41
  return {"message": "API started successfully"}
42
 
43
- @app.get("/search/{keyword}/{limit}")
44
- async def get_articles(keyword: str, limit: int):
 
 
 
 
 
45
  XML_NAMESPACE = "{http://www.w3.org/2005/Atom}"
46
  content = {}
47
  try:
48
- arxiv_search_result = requests.get(f"http://export.arxiv.org/api/query?search_query=all:{keyword}&max_results={limit}", verify=False)
49
  response = xmlparser.fromstring(arxiv_search_result.text)
50
  publications = response.findall(f"{XML_NAMESPACE}entry")
51
  for pub in publications:
@@ -64,10 +70,10 @@ async def get_articles(keyword: str, limit: int):
64
  except Exception as e:
65
  print(f"Error while downloading data : {str(e)}")
66
  return {"error": True, "message": str(e)}
67
-
68
- @app.get("/extract/{id_doc}")
69
- async def extract_text_pdf(id_doc: str):
70
- pdf_req = requests.get(f"http://arxiv.org/pdf/{id_doc}", verify=False)
71
  if pdf_req.status_code == 200:
72
  pdf_data = BytesIO(pdf_req.content)
73
  doc = fitz.open(stream=pdf_data, filetype="pdf")
@@ -100,14 +106,14 @@ async def extract_text_pdf(id_doc: str):
100
  for title in titles:
101
  if title[0] == 1:
102
  main_titles.append(title[1])
103
- return {"pub_id": id_doc, "titles": main_titles, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": id_doc, "titles": "No titles found !", "text": postprocess_text, "error": False}
104
  else:
105
- print("ID: " + id_doc)
106
- print("URL: " + f"http://arxiv.org/pdf/{id_doc}")
107
  print("Status code: " + str(pdf_req.status_code))
108
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
109
 
110
- @app.get("/extract/random/{keyword}/{limit}")
111
- async def extract_random_pdf(keyword: str, limit: int):
112
- pubs = await get_articles(keyword, limit)
113
  return await extract_text_pdf(random.choice(list(pubs["message"].keys())))
 
3
  from contextlib import asynccontextmanager
4
  import xml.etree.ElementTree as xmlparser
5
  import requests
6
+ from pydantic import BaseModel
7
  import sys
8
  import random
9
  import fitz
 
41
  async def root():
42
  return {"message": "API started successfully"}
43
 
44
+ class Query(BaseModel):
45
+ keyword: str
46
+ limit: int
47
+
48
+ # Put all GET into POST
49
+ @app.post("/search")
50
+ async def get_articles(query: Query):
51
  XML_NAMESPACE = "{http://www.w3.org/2005/Atom}"
52
  content = {}
53
  try:
54
+ arxiv_search_result = requests.get(f"http://export.arxiv.org/api/query?search_query=all:{query.keyword}&max_results={query.limit}", verify=False)
55
  response = xmlparser.fromstring(arxiv_search_result.text)
56
  publications = response.findall(f"{XML_NAMESPACE}entry")
57
  for pub in publications:
 
70
  except Exception as e:
71
  print(f"Error while downloading data : {str(e)}")
72
  return {"error": True, "message": str(e)}
73
+
74
+ @app.post("/extract")
75
+ async def extract_text_pdf(doc_id: str):
76
+ pdf_req = requests.get(f"http://arxiv.org/pdf/{doc_id}", verify=False)
77
  if pdf_req.status_code == 200:
78
  pdf_data = BytesIO(pdf_req.content)
79
  doc = fitz.open(stream=pdf_data, filetype="pdf")
 
106
  for title in titles:
107
  if title[0] == 1:
108
  main_titles.append(title[1])
109
+ return {"pub_id": doc_id, "titles": main_titles, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
110
  else:
111
+ print("ID: " + doc_id)
112
+ print("URL: " + f"http://arxiv.org/pdf/{doc_id}")
113
  print("Status code: " + str(pdf_req.status_code))
114
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
115
 
116
+ @app.post("/extract/random")
117
+ async def extract_random_pdf(query: Query):
118
+ pubs = await get_articles(query)
119
  return await extract_text_pdf(random.choice(list(pubs["message"].keys())))