lokami's picture
Add search_kaggle_datasets
a7dc99b verified
raw
history blame
4.2 kB
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from kaggle.api.kaggle_api_extended import KaggleApi
from Gradio_UI import GradioUI
# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that does nothing yet
Args:
arg1: the first argument
arg2: the second argument
"""
return "What magic will you build ?"
@tool
def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key=None:str, max_results:int)-> str:
"""Search for datasets on Kaggle based on a search term.
Args:
search_term: The term to search for.
kaggle_username: Your Kaggle username.
kaggle_key: Your Kaggle API key.
max_results: Maximum number of results to return.
"""
# Initialize the Kaggle API
api = KaggleApi()
# Authenticate using provided credentials
if kaggle_username and kaggle_key:
# Create a temporary kaggle.json file
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
with open(kaggle_json_path, "w") as f:
f.write(kaggle_json_content)
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
else:
# Use the default kaggle.json file if no credentials are provided
return 'Error in searching Kaggle datasets: No username or key provided.'
api.authenticate()
# Search for datasets
datasets = api.dataset_list(search=search_term)
# Limit the number of results
datasets = datasets[:max_results]
# Extract relevant information
results = []
for dataset in datasets:
dataset_info = api.dataset_view(dataset)
results.append({
'title': dataset_info['title'],
'url': f"https://www.kaggle.com/{dataset_info['ref']}",
'size': dataset_info['size'],
'files': dataset_info['files'],
'last_updated': dataset_info['lastUpdated']
})
# Clean up the temporary kaggle.json file if it was created
if kaggle_username and kaggle_key:
os.remove(kaggle_json_path)
return results
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()