from typing import Any, Literal import httpx from mcp.server.fastmcp import FastMCP # Initialize FastMCP server mcp = FastMCP("arxiv-omar") # Constants CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space" DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space" # Helpers async def make_request(url: str, data: dict = None) -> dict[str, Any] | None: if data is None: return None headers = { "Accept": "application/json" } async with httpx.AsyncClient(verify=False) as client: try: response = await client.post(url, headers=headers, json=data) print(response) response.raise_for_status() return response.json() except Exception as e: return None def format_search(pub_id: str, content: dict) -> str: return f""" arXiv publication ID : {pub_id} Title : {content["title"]} Authors : {content["authors"]} Release Date : {content["date"]} Abstract : {content["abstract"]} PDF link : {content["pdf"]} """ def format_extract(message: dict) -> str: return f""" Title of PDF : {message.get("title", "No title has been found")} Text : {message.get("text", "No text !")} """ def format_result_search(page: dict): return f""" Title : {page.get("title", "No titles found !")} Little description : {page.get("body", "No description")} PDF url : {page.get("url", None)} """ # Tools @mcp.tool() async def get_publications(keyword: str, limit: int = 15) -> str: """ Get arXiv publications based on keywords and limit of documents Args: keyword: Keywords separated by spaces limit: Numbers of maximum publications returned (by default, 15) """ url = f"{CUSTOM_ARXIV_API_BASE}/search" data = await make_request(url, data={'keyword': keyword, 'limit': limit}) if data["error"]: return data["message"] if not data: return "Unable to fetch publications" if len(data["message"].keys()) == 0: return "No publications found" publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()] return "\n--\n".join(publications) @mcp.tool() async def web_search(query: str) -> str: """ Search the Web (thanks to DuckDuckGo) for all PDF files based on the keywords Args: query: Keywords to search documents on the Web """ url = f"{DDG_API_BASE}/search" data = await make_request(url, data={"query": query}) if not data: return "Unable to fetch results" if len(data["results"]) == 0: return "No results found" results = [format_result_search(result) for result in data["results"]] return "\n--\n".join(results) @mcp.tool() async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str: """ Extract the text from the URL pointing to a PDF file Args: pdf_url: URL to a PDF document limit_page: How many pages the user wants to extract the content (default: -1 for all pages) """ url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url" data = {"url": pdf_url} if limit_page != -1: data["page_num"] = limit_page data = await make_request(url, data=data) if data["error"]: return data["message"] if not data: return "Unable to extract PDF text" if len(data["message"].keys()) == 0: return "No text can be extracted from this PDF" return format_extract(data["message"]) if __name__ == "__main__": mcp.run(transport="stdio")