PapersChat / toolsFunctions.py
as-cle-bert's picture
Upload 4 files
6ca31d3 verified
raw
history blame
2.99 kB
import urllib, urllib.request
from pydantic import Field
from datetime import datetime
from markitdown import MarkItDown
from Bio import Entrez
import xml.etree.ElementTree as ET
md = MarkItDown()
def format_today():
d = datetime.now()
if d.month < 10:
month = f"0{d.month}"
else:
month = d.month
if d.day < 10:
day = f"0{d.day}"
else:
day = d.day
if d.hour < 10:
hour = f"0{d.hour}"
else:
hour = d.hour
if d.minute < 10:
minute = f"0{d.hour}"
else:
minute = d.minute
today = f"{d.year}{month}{day}{hour}{minute}"
two_years_ago = f"{d.year-2}{month}{day}{hour}{minute}"
return today, two_years_ago
def arxiv_tool(search_query: str = Field(description="The query with which to search ArXiv database")):
"""A tool to search ArXiv"""
today, two_years_ago = format_today()
query = search_query.replace(" ", "+")
url = f'http://export.arxiv.org/api/query?search_query=all:{query}&submittedDate:[{two_years_ago}+TO+{today}]&start=0&max_results=3'
data = urllib.request.urlopen(url)
content = data.read().decode("utf-8")
f = open("arxiv_results.xml", "w")
f.write(content)
f.close()
result = md.convert("arxiv_results.xml")
return result.text_content
def search_pubmed(query):
Entrez.email = "[email protected]" # Replace with your email
handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
record = Entrez.read(handle)
handle.close()
return record["IdList"]
def fetch_pubmed_details(pubmed_ids):
Entrez.email = "[email protected]" # Replace with your email
handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="xml")
records = handle.read()
handle.close()
recs = records.decode("utf-8")
f = open("biomed_results.xml", "w")
f.write(recs)
f.close()
def fetch_xml():
tree = ET.parse("biomed_results.xml")
root = tree.getroot()
parsed_articles = []
for article in root.findall('PubmedArticle'):
# Extract title
title = article.find('.//ArticleTitle')
title_text = title.text if title is not None else "No title"
# Extract abstract
abstract = article.find('.//Abstract/AbstractText')
abstract_text = abstract.text if abstract is not None else "No abstract"
# Format output
formatted_entry = f"## {title_text}\n\n**Abstract**:\n\n{abstract_text}"
parsed_articles.append(formatted_entry)
return "\n\n".join(parsed_articles)
def pubmed_tool(search_query: str = Field(description="The query with which to search PubMed database")):
"""A tool to search PubMed"""
idlist = search_pubmed(search_query)
if len(idlist) == 0:
return "There is no significant match in PubMed"
fetch_pubmed_details(idlist)
content = fetch_xml()
return content