Seb1101 commited on
Commit
ad1be7f
·
verified ·
1 Parent(s): 89f9f33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -202
app.py CHANGED
@@ -3,14 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import re
7
- import json
8
- import urllib.parse
9
- from bs4 import BeautifulSoup
10
- import numpy as np
11
- import sympy as sp
12
- from datetime import datetime, timedelta
13
- import dateutil.parser
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
@@ -18,187 +11,10 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
  # --- GAIA Agent Definition ---
20
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
- class GaiaAgent:
22
- def __init__(self):
23
- print("GaiaAgent initialized.")
24
- self.session = requests.Session()
25
- self.session.headers.update({
26
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
27
- })
28
-
29
- def search_web(self, query, max_results=3):
30
- """Perform web search using DuckDuckGo instant answers or basic search"""
31
- try:
32
- # Try DuckDuckGo instant answer API first
33
- ddg_url = f"https://api.duckduckgo.com/?q={urllib.parse.quote(query)}&format=json&no_html=1&skip_disambig=1"
34
- response = self.session.get(ddg_url, timeout=10)
35
- if response.status_code == 200:
36
- data = response.json()
37
- if data.get('AbstractText'):
38
- return data['AbstractText']
39
- if data.get('Answer'):
40
- return data['Answer']
41
-
42
- # Fallback to basic web scraping (limited)
43
- search_url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
44
- response = self.session.get(search_url, timeout=10)
45
- if response.status_code == 200:
46
- soup = BeautifulSoup(response.text, 'html.parser')
47
- results = soup.find_all('a', class_='result__snippet', limit=max_results)
48
- if results:
49
- return " ".join([r.get_text().strip() for r in results])
50
-
51
- return f"Unable to search for: {query}"
52
- except Exception as e:
53
- return f"Search error: {str(e)}"
54
-
55
- def calculate_math(self, expression):
56
- """Safely evaluate mathematical expressions"""
57
- try:
58
- # Clean the expression
59
- expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
60
- # Use sympy for safe evaluation
61
- result = sp.sympify(expression).evalf()
62
- return str(result)
63
- except Exception as e:
64
- return f"Math error: {str(e)}"
65
-
66
- def parse_date(self, date_string):
67
- """Parse various date formats"""
68
- try:
69
- parsed_date = dateutil.parser.parse(date_string)
70
- return parsed_date.strftime("%Y-%m-%d")
71
- except Exception as e:
72
- return f"Date parsing error: {str(e)}"
73
-
74
- def extract_numbers(self, text):
75
- """Extract numbers from text"""
76
- numbers = re.findall(r'-?\d+\.?\d*', text)
77
- return [float(n) for n in numbers if n]
78
-
79
- def process_question(self, question):
80
- """Process different types of questions with various strategies"""
81
- question_lower = question.lower()
82
-
83
- # Mathematical questions
84
- if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', 'equals', 'sum', 'product']):
85
- numbers = self.extract_numbers(question)
86
- if len(numbers) >= 2:
87
- if 'sum' in question_lower or '+' in question:
88
- return str(sum(numbers))
89
- elif 'product' in question_lower or '*' in question:
90
- result = 1
91
- for n in numbers:
92
- result *= n
93
- return str(result)
94
- elif 'difference' in question_lower or '-' in question:
95
- return str(numbers[0] - numbers[1] if len(numbers) >= 2 else numbers[0])
96
-
97
- # Try to extract and evaluate mathematical expressions
98
- math_pattern = r'[\d+\-*/().\s]+'
99
- math_expr = re.search(math_pattern, question)
100
- if math_expr:
101
- return self.calculate_math(math_expr.group())
102
-
103
- # Date/time questions
104
- if any(word in question_lower for word in ['date', 'time', 'year', 'month', 'day', 'when', 'ago', 'from now']):
105
- # Try to extract dates
106
- date_patterns = [
107
- r'\d{4}-\d{2}-\d{2}',
108
- r'\d{1,2}/\d{1,2}/\d{4}',
109
- r'\d{1,2}-\d{1,2}-\d{4}'
110
- ]
111
- for pattern in date_patterns:
112
- dates = re.findall(pattern, question)
113
- if dates:
114
- return self.parse_date(dates[0])
115
-
116
- # If asking about current date/time
117
- if 'today' in question_lower or 'now' in question_lower:
118
- return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
119
-
120
- # Questions that might need web search
121
- if any(word in question_lower for word in ['who is', 'what is', 'where is', 'when did', 'how many', 'capital of', 'population of']):
122
- search_result = self.search_web(question)
123
- if search_result and "error" not in search_result.lower():
124
- return search_result
125
-
126
- # Geography questions
127
- if any(word in question_lower for word in ['country', 'city', 'capital', 'continent', 'ocean', 'river']):
128
- search_result = self.search_web(question)
129
- if search_result and "error" not in search_result.lower():
130
- return search_result
131
-
132
- # Science/factual questions
133
- if any(word in question_lower for word in ['element', 'chemical', 'planet', 'temperature', 'speed of light', 'gravity']):
134
- search_result = self.search_web(question)
135
- if search_result and "error" not in search_result.lower():
136
- return search_result
137
-
138
- # General knowledge questions - try web search
139
- search_result = self.search_web(question)
140
- if search_result and "error" not in search_result.lower() and len(search_result) > 20:
141
- return search_result
142
-
143
- # If no specific strategy worked, provide a thoughtful response
144
- return self.general_reasoning(question)
145
-
146
- def general_reasoning(self, question):
147
- """Apply general reasoning for questions that don't fit specific categories"""
148
- question_lower = question.lower()
149
-
150
- # Yes/No questions
151
- if question.endswith('?') and any(word in question_lower for word in ['is', 'are', 'can', 'does', 'do', 'will', 'would']):
152
- # Simple heuristics for common yes/no patterns
153
- if 'impossible' in question_lower or 'cannot' in question_lower:
154
- return "No"
155
- elif 'possible' in question_lower or 'can' in question_lower:
156
- return "Yes"
157
-
158
- # Multiple choice detection
159
- if re.search(r'\b[A-D]\)', question) or 'choose' in question_lower:
160
- # Try to extract the most likely answer based on context
161
- options = re.findall(r'[A-D]\)\s*([^A-D\n]+)', question)
162
- if options:
163
- return options[0].strip() # Return first option as fallback
164
-
165
- # Number-based questions
166
- numbers = self.extract_numbers(question)
167
- if numbers:
168
- if 'how many' in question_lower:
169
- return str(int(max(numbers))) # Return largest number found
170
- elif 'which year' in question_lower or 'what year' in question_lower:
171
- years = [n for n in numbers if 1900 <= n <= 2024]
172
- if years:
173
- return str(int(years[0]))
174
-
175
- # Default fallback - try to give a reasonable answer
176
- if 'what' in question_lower:
177
- return "Information not available"
178
- elif 'how' in question_lower:
179
- return "Process not specified"
180
- elif 'where' in question_lower:
181
- return "Location not determined"
182
- elif 'when' in question_lower:
183
- return "Time not specified"
184
- elif 'who' in question_lower:
185
- return "Person not identified"
186
- else:
187
- return "Unable to determine answer"
188
-
189
- def __call__(self, question: str) -> str:
190
- print(f"GaiaAgent received question (first 100 chars): {question[:100]}...")
191
- try:
192
- answer = self.process_question(question)
193
- print(f"GaiaAgent returning answer: {answer[:100]}...")
194
- return answer
195
- except Exception as e:
196
- print(f"Error in GaiaAgent: {e}")
197
- return f"Error processing question: {str(e)}"
198
 
199
  def run_and_submit_all( profile: gr.OAuthProfile | None):
200
  """
201
- Fetches all questions, runs the GaiaAgent on them, submits all answers,
202
  and displays the results.
203
  """
204
  # --- Determine HF Space Runtime URL and Repo URL ---
@@ -217,7 +33,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
217
 
218
  # 1. Instantiate Agent ( modify this part to create your agent)
219
  try:
220
- agent = GaiaAgent()
 
221
  except Exception as e:
222
  print(f"Error instantiating agent: {e}")
223
  return f"Error initializing agent: {e}", None
@@ -318,33 +135,71 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
318
 
319
 
320
  # --- Build Gradio Interface using Blocks ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  with gr.Blocks() as demo:
322
- gr.Markdown("# GAIA Agent Evaluation Runner")
323
  gr.Markdown(
324
  """
325
  **Instructions:**
326
 
327
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
328
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
329
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
330
 
331
  ---
 
 
 
 
 
332
  **Agent Capabilities:**
333
- - Mathematical calculations and computations
334
- - Web search for factual information
335
- - Date and time processing
336
- - General reasoning and pattern recognition
337
- - Multi-step problem solving
338
-
339
- **Disclaimers:**
340
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
341
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
342
  """
343
  )
344
 
345
  gr.LoginButton()
 
 
 
 
 
 
 
 
 
 
 
346
 
347
- run_button = gr.Button("Run Evaluation & Submit All Answers")
348
 
349
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
350
  # Removed max_rows=10 from DataFrame constructor
@@ -376,5 +231,5 @@ if __name__ == "__main__":
376
 
377
  print("-"*(60 + len(" App Starting ")) + "\n")
378
 
379
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
380
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import create_agent
 
 
 
 
 
 
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
 
11
 
12
  # --- GAIA Agent Definition ---
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def run_and_submit_all( profile: gr.OAuthProfile | None):
16
  """
17
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
18
  and displays the results.
19
  """
20
  # --- Determine HF Space Runtime URL and Repo URL ---
 
33
 
34
  # 1. Instantiate Agent ( modify this part to create your agent)
35
  try:
36
+ agent = create_agent()
37
+ print("GAIA Agent initialized successfully.")
38
  except Exception as e:
39
  print(f"Error instantiating agent: {e}")
40
  return f"Error initializing agent: {e}", None
 
135
 
136
 
137
  # --- Build Gradio Interface using Blocks ---
138
+ def check_api_keys():
139
+ """Check if required API keys are available"""
140
+ openai_key = os.getenv("OPENAI_API_KEY")
141
+ tavily_key = os.getenv("TAVILY_API_KEY")
142
+
143
+ status = []
144
+ if openai_key:
145
+ status.append("✅ OpenAI API Key: Found")
146
+ else:
147
+ status.append("❌ OpenAI API Key: Missing")
148
+
149
+ if tavily_key:
150
+ status.append("✅ Tavily API Key: Found")
151
+ else:
152
+ status.append("❌ Tavily API Key: Missing")
153
+
154
+ return "\n".join(status)
155
+
156
  with gr.Blocks() as demo:
157
+ gr.Markdown("# GAIA Dataset Agent Evaluation Runner")
158
  gr.Markdown(
159
  """
160
  **Instructions:**
161
 
162
+ 1. **Clone this space** to your own account
163
+ 2. **Set up API Keys** in your Space Settings:
164
+ - Go to Settings Repository secrets
165
+ - Add `OPENAI_API_KEY` with your OpenAI API key
166
+ - Add `TAVILY_API_KEY` with your Tavily API key
167
+ - Restart the space after adding secrets
168
+ 3. **Log in** to your Hugging Face account using the button below
169
+ 4. **Click 'Run Evaluation & Submit All Answers'** to process all questions
170
 
171
  ---
172
+ **⚠️ Important:**
173
+ - You need valid API keys for OpenAI and Tavily for the agent to work
174
+ - Never put API keys directly in your code - always use HF Space secrets
175
+ - Processing all questions may take several minutes
176
+
177
  **Agent Capabilities:**
178
+ - 🧮 Advanced mathematical calculations
179
+ - 🌐 Web search for factual information
180
+ - 📅 Date and time processing
181
+ - 🤔 Multi-step reasoning with LangGraph
182
+
183
+ **Where to get API Keys:**
184
+ - 🔑 **OpenAI**: Get your API key at [platform.openai.com](https://platform.openai.com/api-keys)
185
+ - 🔍 **Tavily**: Sign up and get your API key at [tavily.com](https://tavily.com)
 
186
  """
187
  )
188
 
189
  gr.LoginButton()
190
+
191
+ # API Key status check
192
+ api_status = gr.Textbox(
193
+ label="API Keys Status",
194
+ value=check_api_keys(),
195
+ interactive=False,
196
+ lines=3
197
+ )
198
+
199
+ refresh_status_btn = gr.Button("🔄 Refresh API Status", size="sm")
200
+ refresh_status_btn.click(fn=check_api_keys, outputs=api_status)
201
 
202
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
203
 
204
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
205
  # Removed max_rows=10 from DataFrame constructor
 
231
 
232
  print("-"*(60 + len(" App Starting ")) + "\n")
233
 
234
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
235
  demo.launch(debug=True, share=False)