Seb1101 commited on
Commit
f909b05
·
verified ·
1 Parent(s): 81917a3
Files changed (1) hide show
  1. app.py +195 -11
app.py CHANGED
@@ -3,25 +3,202 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
@@ -40,7 +217,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -142,7 +319,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
@@ -152,6 +329,13 @@ with gr.Blocks() as demo:
152
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
 
154
  ---
 
 
 
 
 
 
 
155
  **Disclaimers:**
156
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
  This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
@@ -192,5 +376,5 @@ if __name__ == "__main__":
192
 
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import re
7
+ import json
8
+ import urllib.parse
9
+ from bs4 import BeautifulSoup
10
+ import numpy as np
11
+ import sympy as sp
12
+ from datetime import datetime, timedelta
13
+ import dateutil.parser
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ # --- GAIA Agent Definition ---
20
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
+ class GaiaAgent:
22
  def __init__(self):
23
+ print("GaiaAgent initialized.")
24
+ self.session = requests.Session()
25
+ self.session.headers.update({
26
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
27
+ })
28
+
29
+ def search_web(self, query, max_results=3):
30
+ """Perform web search using DuckDuckGo instant answers or basic search"""
31
+ try:
32
+ # Try DuckDuckGo instant answer API first
33
+ ddg_url = f"https://api.duckduckgo.com/?q={urllib.parse.quote(query)}&format=json&no_html=1&skip_disambig=1"
34
+ response = self.session.get(ddg_url, timeout=10)
35
+ if response.status_code == 200:
36
+ data = response.json()
37
+ if data.get('AbstractText'):
38
+ return data['AbstractText']
39
+ if data.get('Answer'):
40
+ return data['Answer']
41
+
42
+ # Fallback to basic web scraping (limited)
43
+ search_url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
44
+ response = self.session.get(search_url, timeout=10)
45
+ if response.status_code == 200:
46
+ soup = BeautifulSoup(response.text, 'html.parser')
47
+ results = soup.find_all('a', class_='result__snippet', limit=max_results)
48
+ if results:
49
+ return " ".join([r.get_text().strip() for r in results])
50
+
51
+ return f"Unable to search for: {query}"
52
+ except Exception as e:
53
+ return f"Search error: {str(e)}"
54
+
55
+ def calculate_math(self, expression):
56
+ """Safely evaluate mathematical expressions"""
57
+ try:
58
+ # Clean the expression
59
+ expression = re.sub(r'[^0-9+\-*/().\s]', '', expression)
60
+ # Use sympy for safe evaluation
61
+ result = sp.sympify(expression).evalf()
62
+ return str(result)
63
+ except Exception as e:
64
+ return f"Math error: {str(e)}"
65
+
66
+ def parse_date(self, date_string):
67
+ """Parse various date formats"""
68
+ try:
69
+ parsed_date = dateutil.parser.parse(date_string)
70
+ return parsed_date.strftime("%Y-%m-%d")
71
+ except Exception as e:
72
+ return f"Date parsing error: {str(e)}"
73
+
74
+ def extract_numbers(self, text):
75
+ """Extract numbers from text"""
76
+ numbers = re.findall(r'-?\d+\.?\d*', text)
77
+ return [float(n) for n in numbers if n]
78
+
79
+ def process_question(self, question):
80
+ """Process different types of questions with various strategies"""
81
+ question_lower = question.lower()
82
+
83
+ # Mathematical questions
84
+ if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', 'equals', 'sum', 'product']):
85
+ numbers = self.extract_numbers(question)
86
+ if len(numbers) >= 2:
87
+ if 'sum' in question_lower or '+' in question:
88
+ return str(sum(numbers))
89
+ elif 'product' in question_lower or '*' in question:
90
+ result = 1
91
+ for n in numbers:
92
+ result *= n
93
+ return str(result)
94
+ elif 'difference' in question_lower or '-' in question:
95
+ return str(numbers[0] - numbers[1] if len(numbers) >= 2 else numbers[0])
96
+
97
+ # Try to extract and evaluate mathematical expressions
98
+ math_pattern = r'[\d+\-*/().\s]+'
99
+ math_expr = re.search(math_pattern, question)
100
+ if math_expr:
101
+ return self.calculate_math(math_expr.group())
102
+
103
+ # Date/time questions
104
+ if any(word in question_lower for word in ['date', 'time', 'year', 'month', 'day', 'when', 'ago', 'from now']):
105
+ # Try to extract dates
106
+ date_patterns = [
107
+ r'\d{4}-\d{2}-\d{2}',
108
+ r'\d{1,2}/\d{1,2}/\d{4}',
109
+ r'\d{1,2}-\d{1,2}-\d{4}'
110
+ ]
111
+ for pattern in date_patterns:
112
+ dates = re.findall(pattern, question)
113
+ if dates:
114
+ return self.parse_date(dates[0])
115
+
116
+ # If asking about current date/time
117
+ if 'today' in question_lower or 'now' in question_lower:
118
+ return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
119
+
120
+ # Questions that might need web search
121
+ if any(word in question_lower for word in ['who is', 'what is', 'where is', 'when did', 'how many', 'capital of', 'population of']):
122
+ search_result = self.search_web(question)
123
+ if search_result and "error" not in search_result.lower():
124
+ return search_result
125
+
126
+ # Geography questions
127
+ if any(word in question_lower for word in ['country', 'city', 'capital', 'continent', 'ocean', 'river']):
128
+ search_result = self.search_web(question)
129
+ if search_result and "error" not in search_result.lower():
130
+ return search_result
131
+
132
+ # Science/factual questions
133
+ if any(word in question_lower for word in ['element', 'chemical', 'planet', 'temperature', 'speed of light', 'gravity']):
134
+ search_result = self.search_web(question)
135
+ if search_result and "error" not in search_result.lower():
136
+ return search_result
137
+
138
+ # General knowledge questions - try web search
139
+ search_result = self.search_web(question)
140
+ if search_result and "error" not in search_result.lower() and len(search_result) > 20:
141
+ return search_result
142
+
143
+ # If no specific strategy worked, provide a thoughtful response
144
+ return self.general_reasoning(question)
145
+
146
+ def general_reasoning(self, question):
147
+ """Apply general reasoning for questions that don't fit specific categories"""
148
+ question_lower = question.lower()
149
+
150
+ # Yes/No questions
151
+ if question.endswith('?') and any(word in question_lower for word in ['is', 'are', 'can', 'does', 'do', 'will', 'would']):
152
+ # Simple heuristics for common yes/no patterns
153
+ if 'impossible' in question_lower or 'cannot' in question_lower:
154
+ return "No"
155
+ elif 'possible' in question_lower or 'can' in question_lower:
156
+ return "Yes"
157
+
158
+ # Multiple choice detection
159
+ if re.search(r'\b[A-D]\)', question) or 'choose' in question_lower:
160
+ # Try to extract the most likely answer based on context
161
+ options = re.findall(r'[A-D]\)\s*([^A-D\n]+)', question)
162
+ if options:
163
+ return options[0].strip() # Return first option as fallback
164
+
165
+ # Number-based questions
166
+ numbers = self.extract_numbers(question)
167
+ if numbers:
168
+ if 'how many' in question_lower:
169
+ return str(int(max(numbers))) # Return largest number found
170
+ elif 'which year' in question_lower or 'what year' in question_lower:
171
+ years = [n for n in numbers if 1900 <= n <= 2024]
172
+ if years:
173
+ return str(int(years[0]))
174
+
175
+ # Default fallback - try to give a reasonable answer
176
+ if 'what' in question_lower:
177
+ return "Information not available"
178
+ elif 'how' in question_lower:
179
+ return "Process not specified"
180
+ elif 'where' in question_lower:
181
+ return "Location not determined"
182
+ elif 'when' in question_lower:
183
+ return "Time not specified"
184
+ elif 'who' in question_lower:
185
+ return "Person not identified"
186
+ else:
187
+ return "Unable to determine answer"
188
+
189
  def __call__(self, question: str) -> str:
190
+ print(f"GaiaAgent received question (first 100 chars): {question[:100]}...")
191
+ try:
192
+ answer = self.process_question(question)
193
+ print(f"GaiaAgent returning answer: {answer[:100]}...")
194
+ return answer
195
+ except Exception as e:
196
+ print(f"Error in GaiaAgent: {e}")
197
+ return f"Error processing question: {str(e)}"
198
 
199
  def run_and_submit_all( profile: gr.OAuthProfile | None):
200
  """
201
+ Fetches all questions, runs the GaiaAgent on them, submits all answers,
202
  and displays the results.
203
  """
204
  # --- Determine HF Space Runtime URL and Repo URL ---
 
217
 
218
  # 1. Instantiate Agent ( modify this part to create your agent)
219
  try:
220
+ agent = GaiaAgent()
221
  except Exception as e:
222
  print(f"Error instantiating agent: {e}")
223
  return f"Error initializing agent: {e}", None
 
319
 
320
  # --- Build Gradio Interface using Blocks ---
321
  with gr.Blocks() as demo:
322
+ gr.Markdown("# GAIA Agent Evaluation Runner")
323
  gr.Markdown(
324
  """
325
  **Instructions:**
 
329
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
330
 
331
  ---
332
+ **Agent Capabilities:**
333
+ - Mathematical calculations and computations
334
+ - Web search for factual information
335
+ - Date and time processing
336
+ - General reasoning and pattern recognition
337
+ - Multi-step problem solving
338
+
339
  **Disclaimers:**
340
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
341
  This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
376
 
377
  print("-"*(60 + len(" App Starting ")) + "\n")
378
 
379
+ print("Launching Gradio Interface for GAIA Agent Evaluation...")
380
  demo.launch(debug=True, share=False)