mhattingpete's picture
Added arXiv search tool
3118bd6 verified
raw
history blame
3.99 kB
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
import arxiv
from transformers import pipeline
# Initialize a summarization pipeline using a pre-trained model.
summarizer = pipeline("summarization")
def _search_arxiv(query: str, max_results: int = 5) -> list[dict[str, str | list[str]]]:
"""
Search for research articles on arXiv based on the given query.
Args:
query (str): The search query.
max_results (int): Maximum number of results to retrieve.
Returns:
list[dict[str, str | list[str]]]: Each dict contains title, authors, summary, publication date, and URL.
"""
search = arxiv.Search(
query=query,
max_results=max_results,
sort_by=arxiv.SortCriterion.SubmittedDate
)
results = []
for result in search.results():
results.append({
'title': result.title,
'authors': [author.name for author in result.authors],
'summary': result.summary,
'published': result.published.strftime("%Y-%m-%d"),
'url': result.entry_id
})
return results
def _summarize_text(text: str) -> str:
"""
Summarize the provided text using the Hugging Face summarization pipeline.
Args:
text (str): The text to summarize.
Returns:
str: The summarized text.
"""
# For longer texts, consider chunking before summarizing.
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
@tool
def personalized_research_assistant(query: str) -> str:
"""A tool that fetches relevant articles from arxiv and provides the information.
Args:
query: The research query to search for in arxiv.
"""
response = ""
articles = _search_arxiv(query)
for idx, article in enumerate(articles):
response += f"\nArticle {idx+1}:\n"
response += f"\nTitle: {article['title']}\n"
response += f"Authors: {', '.join(article['authors'])}\n"
response += f"Published on: {article['published']}\n"
response += f"URL: {article['url']}\n"
response += "Abstract Summary:\n"
response += f"{summarize_text(article['summary'])}\n"
response += "-" * 80
return response
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='https://wxknx1kg971u7k1n.us-east-1.aws.endpoints.huggingface.cloud',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, image_generation_tool, DuckDuckGoSearchTool(), VisitWebpageTool(), get_current_time_in_timezone], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()