Spaces:
Sleeping
Sleeping
File size: 3,218 Bytes
9b5b26a abb8566 9b5b26a c19d193 6aae614 8fe992b 9b5b26a abb8566 9b5b26a abb8566 9b5b26a abb8566 9b5b26a abb8566 9b5b26a abb8566 9b5b26a 8c01ffb 6aae614 e121372 bf6d34c 29ec968 fe328e0 13d500a 8c01ffb 9b5b26a 8c01ffb 861422e 9b5b26a 8c01ffb 8fe992b abb8566 8c01ffb 861422e 8fe992b 9b5b26a 8c01ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
def categorize_content(text, categories):
"""Categorizes text using NLP and TF-IDF similarity."""
vectorizer = TfidfVectorizer()
category_texts = list(categories.values())
category_names = list(categories.keys())
tfidf_matrix = vectorizer.fit_transform([text] + category_texts)
similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
return category_names[similarities.argmax()] if similarities.any() else "Uncategorized"
@tool
def scrape_webpage(url:str, categories:dict = None)-> str: #it's import to specify the return type
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that scrapes a webpage and categorizes the content using NLP.
Args:
url: the first argument
categories: A dictionary with category names as keys and example text as values.
"""
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
text_content = ' '.join(soup.stripped_strings)
if categories:
category = categorize_content(text_content, categories)
return f"The following text content {text_content} was scaped from {url} and categorized as: {category}"
else:
return "The following text content was scaped: %s" % text_content
except requests.RequestException as e:
return f"Error fetching webpage: {str(e)}"
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, scrape_webpage], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch() |