paper-hero / server.py
Spico's picture
- add `build_paper_list` and `build_and_search` methods to help build demo (direct API)
0841c28
raw
history blame
4.64 kB
import logging
import os
import uuid
import tempfile
import pathlib
import uvicorn
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel
from src.interfaces.aclanthology import AclanthologyPaperList
from src.interfaces.arxiv import ArxivPaperList
from src.interfaces.dblp import DblpPaperList
from src.utils import dump_json, load_json
class SearchQuery(BaseModel):
method: str
query: dict
max_results: int = 1000
return_content: bool = False
REMOVE_CACHE = False
ACL_CACHE_FILEPATH = "./cache/aclanthology.json"
app = FastAPI()
logger = logging.getLogger("uvicorn.default")
def get_uid():
return uuid.uuid4().urn.split(":")[-1]
@app.get("/")
async def api():
return FileResponse("./index.html", media_type="text/html")
@app.post("/api/")
async def api(q: SearchQuery): # noqa: F811
ret = {
"ok": False,
"cand": 0,
"paper": 0,
"url": "",
"token": "",
"msg": "",
"content": [],
}
if q.method not in ["aclanthology", "arxiv", "dblp"]:
ret["msg"] = f"{q.method} method not supported"
return ret
papers = []
cache_filepath = ""
if q.method == "aclanthology":
cache_filepath = ACL_CACHE_FILEPATH
plist = AclanthologyPaperList.build_paper_list(ACL_CACHE_FILEPATH)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
elif q.method == "arxiv":
_, cache_filepath = tempfile.mkstemp(
prefix="arxiv.cache.", suffix=".xml", text=True
)
plist = ArxivPaperList.build_paper_list(
cache_filepath, q.query, max_results=q.max_results
)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
elif q.method == "dblp":
_, cache_filepath = tempfile.mkstemp(
prefix="dblp.cache.", suffix=".json", text=True
)
plist = DblpPaperList.build_paper_list(
cache_filepath, q.query, max_results=q.max_results
)
papers = plist.search(q.query)[: q.max_results]
ret["ok"] = True
ret["msg"] = f"#candidates: {len(plist.papers)}"
ret["cand"] = len(plist.papers)
if papers:
papers = [p.as_dict() for p in papers]
ret["paper"] = len(papers)
if q.return_content:
ret["content"] = papers
else:
_, result_filepath = tempfile.mkstemp(
prefix=f"{q.method}.search.", suffix=".json", text=True
)
ret["url"] = result_filepath
ret["token"] = get_uid()
cache = {
"token": ret["token"],
"url": ret["url"],
"content": papers,
}
dump_json(cache, result_filepath)
if REMOVE_CACHE and q.method != "aclanthology":
os.remove(cache_filepath)
logger.info(
(
f"m: {q.method}, q: {q.query}, cands: {len(plist.papers)},"
f" max: {q.max_results}, #papers: {len(papers)}, cache: {cache_filepath}"
f" ret.url: {ret.get('url', '')}"
)
)
return ret
@app.get("/download/")
async def download(u: str, t: str): # noqa: F811
logger.info(f"{u=}, {t=}")
results_filepath = pathlib.Path(u)
token = t
if results_filepath.exists():
data = load_json(results_filepath)
if data["token"] == token:
filename = results_filepath.name
prefix, _, middle, suffix = filename.split(".")
_, download_filepath = tempfile.mkstemp(
prefix=f"{prefix}.download.", suffix=".json"
)
dump_json(data["content"], download_filepath, indent=2)
logger.info(f"Download: from {u} to {download_filepath}")
return FileResponse(download_filepath, filename=f"{prefix}.json")
return {"ok": False, "msg": "file not exist or token mismatch"}
if __name__ == "__main__":
log_config = uvicorn.config.LOGGING_CONFIG
log_config["formatters"]["access"]["fmt"] = (
"%(asctime)s | " + log_config["formatters"]["access"]["fmt"]
)
log_config["formatters"]["default"]["fmt"] = (
"%(asctime)s | " + log_config["formatters"]["default"]["fmt"]
)
uvicorn.run(
"server:app",
host="0.0.0.0",
port=7860,
log_level="debug",
log_config=log_config,
reload=False,
)