Spaces:
Sleeping
Sleeping
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
from bs4 import BeautifulSoup | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import datetime | |
import requests | |
import pytz | |
import yaml | |
from tools.final_answer import FinalAnswerTool | |
from Gradio_UI import GradioUI | |
def categorize_content(text, categories): | |
"""Categorizes text using NLP and TF-IDF similarity.""" | |
vectorizer = TfidfVectorizer() | |
category_texts = list(categories.values()) | |
category_names = list(categories.keys()) | |
tfidf_matrix = vectorizer.fit_transform([text] + category_texts) | |
similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() | |
return category_names[similarities.argmax()] if similarities.any() else "Uncategorized" | |
def scrape_webpage(url:str, categories:dict = None)-> str: #it's import to specify the return type | |
#Keep this format for the description / args / args description but feel free to modify the tool | |
"""A tool that scrapes a webpage and categorizes the content using NLP. | |
Args: | |
url: the first argument | |
categories: A dictionary with category names as keys and example text as values. | |
""" | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
text_content = ' '.join(soup.stripped_strings) | |
if categories: | |
category = categorize_content(text_content, categories) | |
return f"The following text content {text_content} was scaped from {url} and categorized as: {category}" | |
else: | |
return "The following text content was scaped: %s" % text_content | |
except requests.RequestException as e: | |
return f"Error fetching webpage: {str(e)}" | |
def get_current_time_in_timezone(timezone: str) -> str: | |
"""A tool that fetches the current local time in a specified timezone. | |
Args: | |
timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
""" | |
try: | |
# Create timezone object | |
tz = pytz.timezone(timezone) | |
# Get current time in that timezone | |
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
return f"The current local time in {timezone} is: {local_time}" | |
except Exception as e: | |
return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
final_answer = FinalAnswerTool() | |
model = HfApiModel( | |
max_tokens=2096, | |
temperature=0.5, | |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
custom_role_conversions=None, | |
) | |
# Import tool from Hub | |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
with open("prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
agent = CodeAgent( | |
model=model, | |
tools=[final_answer, scrape_webpage], ## add your tools here (don't remove final answer) | |
max_steps=6, | |
verbosity_level=1, | |
grammar=None, | |
planning_interval=None, | |
name=None, | |
description=None, | |
prompt_templates=prompt_templates | |
) | |
GradioUI(agent).launch() |