Spaces:
Running
on
Zero
Running
on
Zero
| import random | |
| from datetime import datetime, timedelta, timezone | |
| from typing import Optional, Union | |
| import arxiv | |
| import requests | |
| # Initialize the arXiv API client | |
| arxiv_client = arxiv.Client() | |
| ARXIV_CATEGORIES = { | |
| "Computer Science": { | |
| "cs.AI": "Artificial Intelligence", | |
| "cs.AR": "Hardware Architecture", | |
| "cs.CC": "Computational Complexity", | |
| "cs.CE": "Computational Engineering", | |
| "cs.CG": "Computational Geometry", | |
| "cs.CL": "Computation and Language", | |
| "cs.CR": "Cryptography and Security", | |
| "cs.CV": "Computer Vision and Pattern Recognition", | |
| "cs.CY": "Computers and Society", | |
| "cs.DB": "Databases", | |
| "cs.DC": "Distributed Computing", | |
| "cs.DL": "Digital Libraries", | |
| "cs.DM": "Discrete Mathematics", | |
| "cs.DS": "Data Structures and Algorithms", | |
| "cs.ET": "Emerging Technologies", | |
| "cs.FL": "Formal Languages and Automata Theory", | |
| "cs.GL": "General Literature", | |
| "cs.GR": "Graphics", | |
| "cs.GT": "Computer Science and Game Theory", | |
| "cs.HC": "Human-Computer Interaction", | |
| "cs.IR": "Information Retrieval", | |
| "cs.IT": "Information Theory", | |
| "cs.LG": "Machine Learning", | |
| "cs.LO": "Logic in Computer Science", | |
| "cs.MA": "Multiagent Systems", | |
| "cs.MM": "Multimedia", | |
| "cs.MS": "Mathematical Software", | |
| "cs.NA": "Numerical Analysis", | |
| "cs.NE": "Neural and Evolutionary Computing", | |
| "cs.NI": "Networking and Internet Architecture", | |
| "cs.OH": "Other Computer Science", | |
| "cs.OS": "Operating Systems", | |
| "cs.PF": "Performance", | |
| "cs.PL": "Programming Languages", | |
| "cs.RO": "Robotics", | |
| "cs.SC": "Symbolic Computation", | |
| "cs.SD": "Sound", | |
| "cs.SE": "Software Engineering", | |
| "cs.SI": "Social and Information Networks", | |
| "cs.SY": "Systems and Control", | |
| }, | |
| "Physics": { | |
| "astro-ph.CO": "Cosmology and Nongalactic Astrophysics", | |
| "astro-ph.EP": "Earth and Planetary Astrophysics", | |
| "astro-ph.GA": "Astrophysics of Galaxies", | |
| "astro-ph.HE": "High Energy Astrophysical Phenomena", | |
| "astro-ph.IM": "Instrumentation and Methods for Astrophysics", | |
| "astro-ph.SR": "Solar and Stellar Astrophysics", | |
| "cond-mat.dis-nn": "Disordered Systems and Neural Networks", | |
| "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics", | |
| "cond-mat.mtrl-sci": "Materials Science", | |
| "cond-mat.other": "Other Condensed Matter", | |
| "cond-mat.quant-gas": "Quantum Gases", | |
| "cond-mat.soft": "Soft Condensed Matter", | |
| "cond-mat.stat-mech": "Statistical Mechanics", | |
| "cond-mat.str-el": "Strongly Correlated Electrons", | |
| "cond-mat.supr-con": "Superconductivity", | |
| "gr-qc": "General Relativity and Quantum Cosmology", | |
| "hep-ex": "High Energy Physics - Experiment", | |
| "hep-lat": "High Energy Physics - Lattice", | |
| "hep-ph": "High Energy Physics - Phenomenology", | |
| "hep-th": "High Energy Physics - Theory", | |
| "math-ph": "Mathematical Physics", | |
| "nlin.AO": "Adaptation and Self-Organizing Systems", | |
| "nlin.CD": "Chaotic Dynamics", | |
| "nlin.CG": "Cellular Automata and Lattice Gases", | |
| "nlin.PS": "Pattern Formation and Solitons", | |
| "nlin.SI": "Exactly Solvable and Integrable Systems", | |
| "nucl-ex": "Nuclear Experiment", | |
| "nucl-th": "Nuclear Theory", | |
| "physics.acc-ph": "Accelerator Physics", | |
| "physics.ao-ph": "Atmospheric and Oceanic Physics", | |
| "physics.app-ph": "Applied Physics", | |
| "physics.atm-clus": "Atomic and Molecular Clusters", | |
| "physics.atom-ph": "Atomic Physics", | |
| "physics.bio-ph": "Biological Physics", | |
| "physics.chem-ph": "Chemical Physics", | |
| "physics.class-ph": "Classical Physics", | |
| "physics.comp-ph": "Computational Physics", | |
| "physics.data-an": "Data Analysis, Statistics and Probability", | |
| "physics.ed-ph": "Physics Education", | |
| "physics.flu-dyn": "Fluid Dynamics", | |
| "physics.gen-ph": "General Physics", | |
| "physics.geo-ph": "Geophysics", | |
| "physics.hist-ph": "History and Philosophy of Physics", | |
| "physics.ins-det": "Instrumentation and Detectors", | |
| "physics.med-ph": "Medical Physics", | |
| "physics.optics": "Optics", | |
| "physics.plasm-ph": "Plasma Physics", | |
| "physics.pop-ph": "Popular Physics", | |
| "physics.soc-ph": "Physics and Society", | |
| "physics.space-ph": "Space Physics", | |
| "quant-ph": "Quantum Physics", | |
| }, | |
| "Mathematics": { | |
| "math.AC": "Commutative Algebra", | |
| "math.AG": "Algebraic Geometry", | |
| "math.AP": "Analysis of PDEs", | |
| "math.AT": "Algebraic Topology", | |
| "math.CA": "Classical Analysis and ODEs", | |
| "math.CO": "Combinatorics", | |
| "math.CT": "Category Theory", | |
| "math.CV": "Complex Variables", | |
| "math.DG": "Differential Geometry", | |
| "math.DS": "Dynamical Systems", | |
| "math.FA": "Functional Analysis", | |
| "math.GM": "General Mathematics", | |
| "math.GN": "General Topology", | |
| "math.GR": "Group Theory", | |
| "math.GT": "Geometric Topology", | |
| "math.HO": "History and Overview", | |
| "math.IT": "Information Theory", | |
| "math.KT": "K-Theory and Homology", | |
| "math.LO": "Logic", | |
| "math.MG": "Metric Geometry", | |
| "math.MP": "Mathematical Physics", | |
| "math.NA": "Numerical Analysis", | |
| "math.NT": "Number Theory", | |
| "math.OA": "Operator Algebras", | |
| "math.OC": "Optimization and Control", | |
| "math.PR": "Probability", | |
| "math.QA": "Quantum Algebra", | |
| "math.RA": "Rings and Algebras", | |
| "math.RT": "Representation Theory", | |
| "math.SG": "Symplectic Geometry", | |
| "math.SP": "Spectral Theory", | |
| "math.ST": "Statistics Theory", | |
| }, | |
| "Quantitative Biology": { | |
| "q-bio.BM": "Biomolecules", | |
| "q-bio.CB": "Cell Behavior", | |
| "q-bio.GN": "Genomics", | |
| "q-bio.MN": "Molecular Networks", | |
| "q-bio.NC": "Neurons and Cognition", | |
| "q-bio.OT": "Other Quantitative Biology", | |
| "q-bio.PE": "Populations and Evolution", | |
| "q-bio.QM": "Quantitative Methods", | |
| "q-bio.SC": "Subcellular Processes", | |
| "q-bio.TO": "Tissues and Organs", | |
| }, | |
| "Quantitative Finance": { | |
| "q-fin.CP": "Computational Finance", | |
| "q-fin.EC": "Economics", | |
| "q-fin.GN": "General Finance", | |
| "q-fin.MF": "Mathematical Finance", | |
| "q-fin.PM": "Portfolio Management", | |
| "q-fin.PR": "Pricing of Securities", | |
| "q-fin.RM": "Risk Management", | |
| "q-fin.ST": "Statistical Arbitrage", | |
| "q-fin.TR": "Trading and Market Microstructure", | |
| }, | |
| "Statistics": { | |
| "stat.AP": "Applications", | |
| "stat.CO": "Computation", | |
| "stat.ME": "Methodology", | |
| "stat.ML": "Machine Learning", | |
| "stat.OT": "Other Statistics", | |
| "stat.TH": "Theory", | |
| }, | |
| "Economics": { | |
| "econ.EM": "Econometrics", | |
| "econ.GN": "General Economics", | |
| "econ.TH": "Economic Theory", | |
| }, | |
| "Electrical Engineering and Systems Science": { | |
| "eess.AS": "Audio and Speech Processing", | |
| "eess.IV": "Image and Video Processing", | |
| "eess.SP": "Signal Processing", | |
| "eess.SY": "Systems and Control", | |
| }, | |
| } | |
| # Flatten categories for easy access | |
| ARXIV_CATEGORIES_FLAT: dict[str, str] = {} | |
| for main_cat, subcats in ARXIV_CATEGORIES.items(): | |
| for cat_code, cat_name in subcats.items(): | |
| ARXIV_CATEGORIES_FLAT[cat_code] = f"{main_cat}: {cat_name} ({cat_code})" | |
| def clean_doi(doi: str) -> str: | |
| if doi.startswith("https://arxiv.org/abs/"): | |
| return doi.split("/")[-1] | |
| elif doi.startswith("https://arxiv.org/pdf/"): | |
| return doi.split("/")[-1].split(".pdf")[0] | |
| elif doi.startswith("arXiv:"): | |
| return doi.split(":")[-1] | |
| elif doi.startswith("http"): | |
| return "Invalid arXiv link. Please provide a link to the abstract page." | |
| elif doi.startswith("10."): | |
| # Fetch the arXiv ID from the DOI | |
| base_url = "http://dx.doi.org/" | |
| headers = {"Accept": "application/x-bibtex"} | |
| response = requests.get(base_url + doi, headers=headers) | |
| if response.status_code != 200: | |
| return "No paper found with that DOI." | |
| bibtext = response.text | |
| return bibtext.split("eprint = {arXiv:")[-1].split("}")[0] | |
| elif doi.replace("v", "").replace(".", "").isdigit(): | |
| return doi | |
| else: | |
| return "Invalid arXiv ID or DOI. Please provide a valid arXiv ID, DOI, or arXiv URL." | |
| def retrieve_arxiv_paper(arxiv_id: str) -> dict: | |
| """Retrieve the paper from arXiv. | |
| Args: | |
| arxiv_id: The arXiv ID of the paper to retrieve. | |
| Returns: | |
| A dict object representing the paper. | |
| """ | |
| global arxiv_client | |
| query_string = arxiv.Search(id_list=[arxiv_id]) | |
| results = arxiv_client.results(query_string) | |
| try: | |
| paper = next(results) | |
| except StopIteration: | |
| raise ValueError("No paper found with that arXiv ID.") | |
| return dict( | |
| arxiv_id=paper.entry_id.split("/")[-1], | |
| title=paper.title, | |
| authors=[author.name for author in paper.authors], | |
| categories=[category for category in paper.categories], | |
| abstract=paper.summary, | |
| published_date=paper.published, | |
| ) | |
| def build_arxiv_category_query( | |
| categories: Union[str, list[str]], | |
| start_date: Optional[datetime] = None, | |
| end_date: Optional[datetime] = None, | |
| start: int = 0, | |
| max_results: int = 5, | |
| ) -> arxiv.Search: | |
| """Builds a query string for the arXiv API. | |
| Args: | |
| categories: List of arXiv categories to search. | |
| start_date: Optional datetime to start search from. | |
| end_date: Optional datetime to end search at. | |
| start: Index of first result to return. | |
| max_results: Maximum number of results to return. | |
| Returns: | |
| arxiv.Search object with the constructed query. | |
| """ | |
| if isinstance(categories, str): | |
| categories = [categories] | |
| if start_date and end_date: | |
| date_str = f"{start_date.strftime('%Y%m%d%H%M')}+TO+{end_date.strftime('%Y%m%d%H%M')}" | |
| elif start_date: | |
| date_str = start_date.strftime("%Y%m%d%H%M") | |
| date_str = f"{date_str}+TO+{datetime.now(timezone.utc).strftime('%Y%m%d%H%M')}" | |
| else: | |
| date_str = "" | |
| # Construct the category string, including the date range if provided | |
| cat_str = " OR ".join([f"cat:{cat}" for cat in categories]) if categories else "" | |
| if date_str: | |
| cat_str = f"({cat_str}) AND submittedDate:[{date_str}]" | |
| search = arxiv.Search( | |
| query=cat_str, | |
| max_results=max_results, | |
| sort_by=arxiv.SortCriterion.SubmittedDate, | |
| sort_order=arxiv.SortOrder.Descending, | |
| ) | |
| return search | |
| def retrieve_arxiv_papers( | |
| categories: Union[str, list[str]], | |
| start_date: Optional[datetime] = None, | |
| end_date: Optional[datetime] = None, | |
| start: int = 0, | |
| max_results: int = 5, | |
| ) -> list[dict]: | |
| """Searches arXiv for papers in the given categories. | |
| Args: | |
| categories: List of arXiv categories to search. | |
| start_date: Date to start searching from. | |
| end_date: Date to stop searching at. | |
| start: Index of the first result to return. | |
| max_results: Maximum number of results to return. | |
| Returns: | |
| A generator of dict objects. | |
| """ | |
| global arxiv_client | |
| query_string = build_arxiv_category_query(categories, start_date, end_date, start, max_results) | |
| papers = [] | |
| for result in arxiv_client.results(query_string, offset=start): | |
| papers.append( | |
| dict( | |
| arxiv_id=result.entry_id.split("/")[-1], | |
| title=result.title, | |
| authors=[author.name for author in result.authors], | |
| categories=[category for category in result.categories], | |
| abstract=result.summary, | |
| published_date=result.published, | |
| ) | |
| ) | |
| return papers | |
| def fetch_todays_papers(categories: Union[str, list[str]], start: int = 0, max_results: int = 5) -> list[dict]: | |
| """Fetch papers from today in the given categories | |
| Args: | |
| categories: List of arXiv categories to search | |
| start: Index of the first result to return | |
| max_results: Maximum number of results to return | |
| Returns: | |
| Generator of arXiv.Result objects | |
| """ | |
| if isinstance(categories, str): | |
| categories = [categories] | |
| papers = retrieve_arxiv_papers( | |
| categories, | |
| start_date=datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0), | |
| start=start, | |
| max_results=max_results, | |
| ) | |
| return papers | |
| def fetch_24_hours_papers(categories: Union[str, list[str]], start: int = 0, max_results: int = 5) -> list[dict]: | |
| """Fetch papers from the last 24 hours in the given categories | |
| Args: | |
| categories: List of arXiv categories to search | |
| start: Index of the first result to return | |
| max_results: Maximum number of results to return | |
| Returns: | |
| Generator of dict objects | |
| """ | |
| if isinstance(categories, str): | |
| categories = [categories] | |
| twenty_four_hours_ago = datetime.now(timezone.utc) - timedelta(days=1) | |
| papers = retrieve_arxiv_papers( | |
| categories, | |
| start_date=twenty_four_hours_ago, | |
| start=start, | |
| max_results=max_results, | |
| ) | |
| return papers | |
| def random_arxiv_category(): | |
| return random.choice(list(ARXIV_CATEGORIES_FLAT.values())) | |