lokami commited on
Commit
a7dc99b
·
verified ·
1 Parent(s): ae7a494

Add search_kaggle_datasets

Browse files
Files changed (1) hide show
  1. app.py +55 -1
app.py CHANGED
@@ -1,10 +1,12 @@
1
- from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
2
  import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
 
 
 
8
  from Gradio_UI import GradioUI
9
 
10
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
@@ -18,6 +20,58 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
18
  """
19
  return "What magic will you build ?"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  @tool
22
  def get_current_time_in_timezone(timezone: str) -> str:
23
  """A tool that fetches the current local time in a specified timezone.
 
1
+ from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
2
  import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
 
8
+ from kaggle.api.kaggle_api_extended import KaggleApi
9
+
10
  from Gradio_UI import GradioUI
11
 
12
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
 
20
  """
21
  return "What magic will you build ?"
22
 
23
+ @tool
24
+ def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key=None:str, max_results:int)-> str:
25
+ """Search for datasets on Kaggle based on a search term.
26
+ Args:
27
+ search_term: The term to search for.
28
+ kaggle_username: Your Kaggle username.
29
+ kaggle_key: Your Kaggle API key.
30
+ max_results: Maximum number of results to return.
31
+ """
32
+ # Initialize the Kaggle API
33
+ api = KaggleApi()
34
+
35
+ # Authenticate using provided credentials
36
+ if kaggle_username and kaggle_key:
37
+ # Create a temporary kaggle.json file
38
+ kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
39
+ kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
40
+ os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
41
+ with open(kaggle_json_path, "w") as f:
42
+ f.write(kaggle_json_content)
43
+ os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
44
+ else:
45
+ # Use the default kaggle.json file if no credentials are provided
46
+ return 'Error in searching Kaggle datasets: No username or key provided.'
47
+
48
+ api.authenticate()
49
+
50
+ # Search for datasets
51
+ datasets = api.dataset_list(search=search_term)
52
+
53
+ # Limit the number of results
54
+ datasets = datasets[:max_results]
55
+
56
+ # Extract relevant information
57
+ results = []
58
+ for dataset in datasets:
59
+ dataset_info = api.dataset_view(dataset)
60
+ results.append({
61
+ 'title': dataset_info['title'],
62
+ 'url': f"https://www.kaggle.com/{dataset_info['ref']}",
63
+ 'size': dataset_info['size'],
64
+ 'files': dataset_info['files'],
65
+ 'last_updated': dataset_info['lastUpdated']
66
+ })
67
+
68
+ # Clean up the temporary kaggle.json file if it was created
69
+ if kaggle_username and kaggle_key:
70
+ os.remove(kaggle_json_path)
71
+
72
+ return results
73
+
74
+
75
  @tool
76
  def get_current_time_in_timezone(timezone: str) -> str:
77
  """A tool that fetches the current local time in a specified timezone.