mhattingpete commited on
Commit
3118bd6
·
verified ·
1 Parent(s): d638726

Added arXiv search tool

Browse files
Files changed (1) hide show
  1. app.py +66 -8
app.py CHANGED
@@ -3,20 +3,78 @@ import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
- from tools.final_answer import FinalAnswerTool, VisitWebpageTool
 
7
 
8
  from Gradio_UI import GradioUI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Below is an example of a tool that does nothing. Amaze us with your creativity !
11
  @tool
12
- def my_cutom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
13
- #Keep this format for the description / args / args description but feel free to modify the tool
14
- """A tool that does nothing yet
15
  Args:
16
- arg1: the first argument
17
- arg2: the second argument
18
  """
19
- return "What magic will you build ?"
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @tool
22
  def get_current_time_in_timezone(timezone: str) -> str:
 
3
  import requests
4
  import pytz
5
  import yaml
6
+ from tools.final_answer import FinalAnswerTool
7
+ from tools.visit_webpage import VisitWebpageTool
8
 
9
  from Gradio_UI import GradioUI
10
+ import arxiv
11
+ from transformers import pipeline
12
+
13
+ # Initialize a summarization pipeline using a pre-trained model.
14
+ summarizer = pipeline("summarization")
15
+
16
+ def _search_arxiv(query: str, max_results: int = 5) -> list[dict[str, str | list[str]]]:
17
+ """
18
+ Search for research articles on arXiv based on the given query.
19
+
20
+ Args:
21
+ query (str): The search query.
22
+ max_results (int): Maximum number of results to retrieve.
23
+
24
+ Returns:
25
+ list[dict[str, str | list[str]]]: Each dict contains title, authors, summary, publication date, and URL.
26
+ """
27
+ search = arxiv.Search(
28
+ query=query,
29
+ max_results=max_results,
30
+ sort_by=arxiv.SortCriterion.SubmittedDate
31
+ )
32
+ results = []
33
+ for result in search.results():
34
+ results.append({
35
+ 'title': result.title,
36
+ 'authors': [author.name for author in result.authors],
37
+ 'summary': result.summary,
38
+ 'published': result.published.strftime("%Y-%m-%d"),
39
+ 'url': result.entry_id
40
+ })
41
+ return results
42
+
43
+ def _summarize_text(text: str) -> str:
44
+ """
45
+ Summarize the provided text using the Hugging Face summarization pipeline.
46
+
47
+ Args:
48
+ text (str): The text to summarize.
49
+
50
+ Returns:
51
+ str: The summarized text.
52
+ """
53
+ # For longer texts, consider chunking before summarizing.
54
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
55
+ return summary[0]['summary_text']
56
+
57
 
 
58
  @tool
59
+ def personalized_research_assistant(query: str) -> str:
60
+ """A tool that fetches relevant articles from arxiv and provides the information.
61
+
62
  Args:
63
+ query: The research query to search for in arxiv.
 
64
  """
65
+ response = ""
66
+ articles = _search_arxiv(query)
67
+ for idx, article in enumerate(articles):
68
+ response += f"\nArticle {idx+1}:\n"
69
+ response += f"\nTitle: {article['title']}\n"
70
+ response += f"Authors: {', '.join(article['authors'])}\n"
71
+ response += f"Published on: {article['published']}\n"
72
+ response += f"URL: {article['url']}\n"
73
+ response += "Abstract Summary:\n"
74
+ response += f"{summarize_text(article['summary'])}\n"
75
+ response += "-" * 80
76
+ return response
77
+
78
 
79
  @tool
80
  def get_current_time_in_timezone(timezone: str) -> str: