Spaces:

Game4all
/

SERPent2

Running

SERPent2 / serp /arxiv.py

Initial commit

d907837 16 days ago

1.57 kB

	from serp.base import SERPBackendBase, SerpResultItem
	from lxml import etree


	class ArxivSerpBackend(SERPBackendBase):
	@property
	def name(self):
	return "arxiv"

	async def query(self, query, client):
	"""Searches arXiv for the specified query and returns a list of results with titles and PDF URLs."""
	ATOM_NAMESPACE = {'atom': 'http://www.w3.org/2005/Atom'}
	ARXIV_API_URL = 'https://export.arxiv.org/api/query?'

	search_params = {
	'search_query': query.query,
	'start': 0,
	'max_results': query.n_results,
	'sortBy': "submittedDate" if query.sort_by == "date" else "relevance"
	}
	query_url = ARXIV_API_URL

	response = await client.get(query_url, params=search_params)
	response.raise_for_status()

	root = etree.fromstring(response.content)
	entries = root.findall('atom:entry', ATOM_NAMESPACE)

	results = []
	for entry in entries:
	title = entry.find(
	'atom:title', ATOM_NAMESPACE).text.strip().replace('\n', ' ')
	id = entry.find('atom:id', ATOM_NAMESPACE).text.strip()
	pdf_url = entry.find(
	'atom:id', ATOM_NAMESPACE).text.replace('/abs/', '/pdf/')
	summary = entry.find(
	'atom:summary', ATOM_NAMESPACE).text.strip()
	results.append(SerpResultItem(
	title=title, href=pdf_url, body=summary, id=id))

	return results

	@property
	def category(self):
	return "scholar"