Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent,ToolCallingAgent,HfApiModel,load_tool,tool | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| from tools.final_answer import FinalAnswerTool | |
| from tools.user_input import UserInputTool | |
| from kaggle.api.kaggle_api_extended import KaggleApi | |
| import os | |
| from Gradio_UI import GradioUI | |
| os.environ['KAGGLE_USERNAME'] = '' | |
| os.environ['KAGGLE_KEY'] = '' | |
| def auth_kaggle() -> KaggleApi: | |
| """Authenticate Kaggle and return the API object. | |
| """ | |
| api = KaggleApi() | |
| try: | |
| api.authenticate() | |
| except Exception as e: | |
| return f"Error authenticating with Kaggle: {str(e)}" | |
| return api | |
| def search_kaggle_datasets(search_term:str, | |
| max_results:int = 10 | |
| ) -> list[dict[str]]: | |
| """Search for datasets on Kaggle based on a search term and return list of datasets metadata. | |
| Args: | |
| search_term: The term to search for. | |
| max_results: Maximum number of results to return. | |
| """ | |
| kaggle_api = auth_kaggle() | |
| # Search for datasets | |
| datasets = kaggle_api.dataset_list(search=search_term) | |
| # Limit the number of results | |
| datasets = datasets[:max_results] | |
| # Extract relevant information | |
| results = [] | |
| for dataset in datasets: | |
| dataset_info = kaggle_api.dataset_view(dataset) | |
| results.append({ | |
| 'title': dataset_info['title'], | |
| 'url': f"https://www.kaggle.com/{dataset_info['ref']}", | |
| 'size': dataset_info['size'], | |
| 'files': dataset_info['files'], | |
| 'last_updated': dataset_info['lastUpdated'] | |
| }) | |
| return results | |
| def download_kaggle_dataset( | |
| dataset_ref: str, | |
| download_path: str, | |
| unzip: bool = True | |
| ) -> str: | |
| """Download a dataset from Kaggle. | |
| Args: | |
| dataset_ref: The reference of the dataset (e.g., "username/dataset-name"). | |
| download_path: The directory where the dataset will be downloaded. | |
| unzip: Whether to unzip the dataset after downloading. Default is True. | |
| """ | |
| # Ensure the download path exists | |
| os.makedirs(download_path, exist_ok=True) | |
| kaggle_api = auth_kaggle() | |
| # Download the dataset | |
| kaggle_api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip) | |
| return f"Dataset '{dataset_ref}' downloaded to '{download_path}'." | |
| final_answer = FinalAnswerTool() | |
| user_input = UserInputTool() | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
| custom_role_conversions=None, | |
| ) | |
| # Import tool from Hub | |
| image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| conversional_agent = ToolCallingAgent(model=model, | |
| tools=[user_input], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=0, | |
| name='ask_question', | |
| description='Ask a question to the user and get the answer', | |
| prompt_templates=prompt_templates, | |
| add_base_tools=True, | |
| ) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[final_answer, | |
| search_kaggle_datasets, | |
| user_input, | |
| download_kaggle_dataset, | |
| image_generation_tool], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=2, | |
| name=None, | |
| description=None, | |
| managed_agents=[conversional_agent], | |
| prompt_templates=prompt_templates, | |
| additional_authorized_imports=['pandas', | |
| 'matplotlib', | |
| 'seaborn'], | |
| add_base_tools=True, | |
| ) | |
| GradioUI(agent).launch() |