First_agent_template

Sleeping

App Files Files Community

thivy commited on Feb 13

Commit

abb8566

1 Parent(s): 3d1237b

feat: :sparkles: add scraper as tool

Browse files

Files changed (1) hide show

app.py +32 -7

app.py CHANGED Viewed

@@ -1,4 +1,9 @@
 from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
 import datetime
 import requests
 import pytz
@@ -7,16 +12,36 @@ from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
-# Below is an example of a tool that does nothing. Amaze us with your creativity !
 @tool
-def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
     #Keep this format for the description / args / args description but feel free to modify the tool
-    """A tool that does nothing yet
     Args:
-        arg1: the first argument
-        arg2: the second argument
     """
-    return "What magic will you build ?"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
@@ -51,7 +76,7 @@ with open("prompts.yaml", 'r') as stream:
 agent = CodeAgent(
     model=model,
-    tools=[final_answer], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,

 from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
+from bs4 import BeautifulSoup
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
 import datetime
 import requests
 import pytz
 from Gradio_UI import GradioUI
+def categorize_content(text, categories):
+    """Categorizes text using NLP and TF-IDF similarity."""
+    vectorizer = TfidfVectorizer()
+    category_texts = list(categories.values())
+    category_names = list(categories.keys())
+    tfidf_matrix = vectorizer.fit_transform([text] + category_texts)
+    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
+    return category_names[similarities.argmax()] if similarities.any() else "Uncategorized"
 @tool
+def scrape_webpage(url:str, categories:dict = None)-> str: #it's import to specify the return type
     #Keep this format for the description / args / args description but feel free to modify the tool
+    """A tool that scrapes a webpage and categorizes the content using NLP.
     Args:
+        url: the first argument
+        categories: A dictionary with category names as keys and example text as values.
     """
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+        text_content = ' '.join(soup.stripped_strings)
+        if categories:
+            category = categorize_content(text_content, categories)
+            return f"The following text content {text_content} was scaped from {url} and categorized as: {category}"
+        else:
+            return "The following text content was scaped: %s" % text_content
+    except requests.RequestException as e:
+        return f"Error fetching webpage: {str(e)}"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
 agent = CodeAgent(
     model=model,
+    tools=[final_answer, scrape_webpage], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,