lokami commited on
Commit
156c068
·
1 Parent(s): a7dc99b

Add search_kaggle_datasets, download_kaggle_dataset and authorize the use of datasci libs

Browse files
Files changed (2) hide show
  1. Gradio_UI.py +1 -0
  2. app.py +53 -15
Gradio_UI.py CHANGED
@@ -13,6 +13,7 @@
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
 
16
  import mimetypes
17
  import os
18
  import re
 
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
+ # pandas transformers sklearn smolagents matplotlib seaborn
17
  import mimetypes
18
  import os
19
  import re
app.py CHANGED
@@ -4,8 +4,10 @@ import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
 
7
 
8
  from kaggle.api.kaggle_api_extended import KaggleApi
 
9
 
10
  from Gradio_UI import GradioUI
11
 
@@ -21,7 +23,7 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
21
  return "What magic will you build ?"
22
 
23
  @tool
24
- def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key=None:str, max_results:int)-> str:
25
  """Search for datasets on Kaggle based on a search term.
26
  Args:
27
  search_term: The term to search for.
@@ -45,8 +47,11 @@ def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key
45
  # Use the default kaggle.json file if no credentials are provided
46
  return 'Error in searching Kaggle datasets: No username or key provided.'
47
 
48
- api.authenticate()
49
-
 
 
 
50
  # Search for datasets
51
  datasets = api.dataset_list(search=search_term)
52
 
@@ -71,22 +76,54 @@ def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key
71
 
72
  return results
73
 
74
-
75
  @tool
76
- def get_current_time_in_timezone(timezone: str) -> str:
77
- """A tool that fetches the current local time in a specified timezone.
 
 
 
 
 
 
78
  Args:
79
- timezone: A string representing a valid timezone (e.g., 'America/New_York').
 
 
 
 
80
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  try:
82
- # Create timezone object
83
- tz = pytz.timezone(timezone)
84
- # Get current time in that timezone
85
- local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
86
- return f"The current local time in {timezone} is: {local_time}"
87
  except Exception as e:
88
- return f"Error fetching time for timezone '{timezone}': {str(e)}"
 
 
 
 
 
 
 
 
 
 
89
 
 
90
 
91
  final_answer = FinalAnswerTool()
92
 
@@ -109,14 +146,15 @@ with open("prompts.yaml", 'r') as stream:
109
 
110
  agent = CodeAgent(
111
  model=model,
112
- tools=[final_answer], ## add your tools here (don't remove final answer)
113
  max_steps=6,
114
  verbosity_level=1,
115
  grammar=None,
116
  planning_interval=None,
117
  name=None,
118
  description=None,
119
- prompt_templates=prompt_templates
 
120
  )
121
 
122
 
 
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
+ # from typing import Optional
8
 
9
  from kaggle.api.kaggle_api_extended import KaggleApi
10
+ import os
11
 
12
  from Gradio_UI import GradioUI
13
 
 
23
  return "What magic will you build ?"
24
 
25
  @tool
26
+ def search_kaggle_datasets(search_term:str, kaggle_username:str = None, kaggle_key:str = None, max_results:int = 10)-> list[dict[str]]:
27
  """Search for datasets on Kaggle based on a search term.
28
  Args:
29
  search_term: The term to search for.
 
47
  # Use the default kaggle.json file if no credentials are provided
48
  return 'Error in searching Kaggle datasets: No username or key provided.'
49
 
50
+ try:
51
+ api.authenticate()
52
+ except Exception as e:
53
+ return f"Error authenticating with Kaggle: {str(e)}"
54
+
55
  # Search for datasets
56
  datasets = api.dataset_list(search=search_term)
57
 
 
76
 
77
  return results
78
 
 
79
  @tool
80
+ def download_kaggle_dataset(
81
+ dataset_ref: str,
82
+ download_path: str,
83
+ kaggle_username: str = None,
84
+ kaggle_key: str = None,
85
+ unzip: bool = True
86
+ ) -> str:
87
+ """Download a dataset from Kaggle.
88
  Args:
89
+ dataset_ref: The reference of the dataset (e.g., "username/dataset-name").
90
+ download_path: The directory where the dataset will be downloaded.
91
+ kaggle_username: Your Kaggle username (from kaggle.json).
92
+ kaggle_key: Your Kaggle API key (from kaggle.json).
93
+ unzip: Whether to unzip the dataset after downloading. Default is True.
94
  """
95
+ # Initialize the Kaggle API
96
+ api = KaggleApi()
97
+
98
+ # Authenticate using provided credentials
99
+ if kaggle_username and kaggle_key:
100
+ # Create a temporary kaggle.json file
101
+ kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
102
+ kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
103
+ os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
104
+ with open(kaggle_json_path, "w") as f:
105
+ f.write(kaggle_json_content)
106
+ os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
107
+ else:
108
+ # Use the default kaggle.json file if no credentials are provided
109
+ pass
110
+
111
  try:
112
+ api.authenticate()
 
 
 
 
113
  except Exception as e:
114
+ return f"Error authenticating with Kaggle: {str(e)}"
115
+
116
+ # Ensure the download path exists
117
+ os.makedirs(download_path, exist_ok=True)
118
+
119
+ # Download the dataset
120
+ api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip)
121
+
122
+ # Clean up the temporary kaggle.json file if it was created
123
+ if kaggle_username and kaggle_key:
124
+ os.remove(kaggle_json_path)
125
 
126
+ return f"Dataset '{dataset_ref}' downloaded to '{download_path}'."
127
 
128
  final_answer = FinalAnswerTool()
129
 
 
146
 
147
  agent = CodeAgent(
148
  model=model,
149
+ tools=[final_answer, search_kaggle_datasets, download_kaggle_dataset], ## add your tools here (don't remove final answer)
150
  max_steps=6,
151
  verbosity_level=1,
152
  grammar=None,
153
  planning_interval=None,
154
  name=None,
155
  description=None,
156
+ prompt_templates=prompt_templates,
157
+ additional_authorized_imports=['pandas', 'matplotlib', 'seaborn'],
158
  )
159
 
160