Renzo commited on
Commit
6c75f10
·
1 Parent(s): d68986e

Update .gitignore, refactor app.py for improved agent functionality, and enhance README

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. README.md +7 -1
  3. app.py +81 -137
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .venv
2
  .idea
3
- .env
 
 
1
  .venv
2
  .idea
3
+ .env
4
+ questions.txt
README.md CHANGED
@@ -12,4 +12,10 @@ hf_oauth: true
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
16
+
17
+ # Validate answer here
18
+
19
+ ```
20
+ https://huggingface.co/datasets/gaia-benchmark/GAIA/blob/main/2023/validation/metadata.jsonl
21
+ ```
app.py CHANGED
@@ -1,19 +1,15 @@
1
  import os
2
  import asyncio
 
3
  import gradio as gr
4
  import requests
5
- import inspect
6
  import pandas as pd
7
- from agent import agent
8
  from agno.agent import RunResponse
 
9
 
10
- # (Keep Constants as is)
11
- # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
 
15
- # --- Basic Agent Definition ---
16
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
17
  async def _async_answer(answer_text: str) -> str:
18
  response: RunResponse = await agent.arun(answer_text)
19
  return response.content
@@ -21,188 +17,136 @@ async def _async_answer(answer_text: str) -> str:
21
 
22
  class BasicAgent:
23
  def __init__(self):
24
- print("BasicAgent initialized.")
25
 
26
  def __call__(self, question: str) -> str:
27
- print(f"Agent received question (first 50 chars): {question[:50]}...")
28
- fixed_answer = "This is a default answer."
29
- answer = asyncio.run(_async_answer(question))
30
- print(f"Agent returning fixed answer: {answer}")
31
- return answer
32
 
33
 
34
- def run_and_submit_all(profile: gr.OAuthProfile | None):
35
- """
36
- Fetches all questions, runs the BasicAgent on them, submits all answers,
37
- and displays the results.
38
- """
39
- # --- Determine HF Space Runtime URL and Repo URL ---
40
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
41
-
42
  if profile:
43
  username = f"{profile.username}"
44
- print(f"User logged in: {username}")
45
  else:
46
- print("User not logged in.")
47
- return "Please Login to Hugging Face with the button.", None
48
 
49
  api_url = DEFAULT_API_URL
50
  questions_url = f"{api_url}/questions"
51
  submit_url = f"{api_url}/submit"
52
 
53
- # 1. Instantiate Agent ( modify this part to create your agent)
54
  try:
55
- agent = BasicAgent()
56
  except Exception as e:
57
- print(f"Error instantiating agent: {e}")
58
  return f"Error initializing agent: {e}", None
59
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
60
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
61
- print(agent_code)
62
 
63
- # 2. Fetch Questions
64
- print(f"Fetching questions from: {questions_url}")
65
  try:
66
  response = requests.get(questions_url, timeout=15)
67
  response.raise_for_status()
68
  questions_data = response.json()
69
- if not questions_data:
70
- print("Fetched questions list is empty.")
71
- return "Fetched questions list is empty or invalid format.", None
72
- print(f"Fetched {len(questions_data)} questions.")
73
- except requests.exceptions.RequestException as e:
74
- print(f"Error fetching questions: {e}")
75
- return f"Error fetching questions: {e}", None
76
- except requests.exceptions.JSONDecodeError as e:
77
- print(f"Error decoding JSON response from questions endpoint: {e}")
78
- print(f"Response text: {response.text[:500]}")
79
- return f"Error decoding server response for questions: {e}", None
80
  except Exception as e:
81
- print(f"An unexpected error occurred fetching questions: {e}")
82
- return f"An unexpected error occurred fetching questions: {e}", None
 
 
 
 
83
 
84
- # 3. Run your Agent
85
  results_log = []
86
  answers_payload = []
87
- print(f"Running agent on {len(questions_data)} questions...")
88
  for item in questions_data:
89
- task_id = item.get("task_id")
90
- question_text = item.get("question")
91
- if not task_id or question_text is None:
92
- print(f"Skipping item with missing task_id or question: {item}")
93
  continue
94
  try:
95
- submitted_answer = agent(question_text)
96
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
97
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
98
  except Exception as e:
99
- print(f"Error running agent on task {task_id}: {e}")
100
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
101
 
102
  if not answers_payload:
103
- print("Agent did not produce any answers to submit.")
104
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
105
 
106
- # 4. Prepare Submission
107
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
108
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
109
- print(status_update)
 
110
 
111
- # 5. Submit
112
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
113
  try:
114
  response = requests.post(submit_url, json=submission_data, timeout=60)
115
  response.raise_for_status()
116
  result_data = response.json()
117
  final_status = (
118
- f"Submission Successful!\n"
119
  f"User: {result_data.get('username')}\n"
120
- f"Overall Score: {result_data.get('score', 'N/A')}% "
121
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
122
- f"Message: {result_data.get('message', 'No message received.')}"
123
  )
124
- print("Submission successful.")
125
- results_df = pd.DataFrame(results_log)
126
- return final_status, results_df
127
- except requests.exceptions.HTTPError as e:
128
- error_detail = f"Server responded with status {e.response.status_code}."
129
- try:
130
- error_json = e.response.json()
131
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
132
- except requests.exceptions.JSONDecodeError:
133
- error_detail += f" Response: {e.response.text[:500]}"
134
- status_message = f"Submission Failed: {error_detail}"
135
- print(status_message)
136
- results_df = pd.DataFrame(results_log)
137
- return status_message, results_df
138
- except requests.exceptions.Timeout:
139
- status_message = "Submission Failed: The request timed out."
140
- print(status_message)
141
- results_df = pd.DataFrame(results_log)
142
- return status_message, results_df
143
- except requests.exceptions.RequestException as e:
144
- status_message = f"Submission Failed: Network error - {e}"
145
- print(status_message)
146
- results_df = pd.DataFrame(results_log)
147
- return status_message, results_df
148
  except Exception as e:
149
- status_message = f"An unexpected error occurred during submission: {e}"
150
- print(status_message)
151
- results_df = pd.DataFrame(results_log)
152
- return status_message, results_df
 
 
 
 
 
153
 
154
 
155
- # --- Build Gradio Interface using Blocks ---
156
  with gr.Blocks() as demo:
157
  gr.Markdown("# Basic Agent Evaluation Runner")
158
- gr.Markdown(
159
- """
160
- **Instructions:**
161
-
162
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
163
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
164
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
165
-
166
- ---
167
- **Disclaimers:**
168
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
169
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
170
- """
171
- )
172
 
173
  gr.LoginButton()
174
 
175
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
176
 
177
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
178
- # Removed max_rows=10 from DataFrame constructor
179
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
180
 
181
- run_button.click(
182
- fn=run_and_submit_all,
183
- outputs=[status_output, results_table]
 
184
  )
185
 
186
- if __name__ == "__main__":
187
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
188
- # Check for SPACE_HOST and SPACE_ID at startup for information
189
- space_host_startup = os.getenv("SPACE_HOST")
190
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
191
-
192
- if space_host_startup:
193
- print(f"✅ SPACE_HOST found: {space_host_startup}")
194
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
195
- else:
196
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
197
-
198
- if space_id_startup: # Print repo URLs if SPACE_ID is found
199
- print(f"✅ SPACE_ID found: {space_id_startup}")
200
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
201
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
202
- else:
203
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
204
 
205
- print("-" * (60 + len(" App Starting ")) + "\n")
 
 
206
 
207
- print("Launching Gradio Interface for Basic Agent Evaluation...")
208
- demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import asyncio
3
+ import argparse
4
  import gradio as gr
5
  import requests
 
6
  import pandas as pd
 
7
  from agno.agent import RunResponse
8
+ from agent import agent
9
 
 
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
 
 
 
13
  async def _async_answer(answer_text: str) -> str:
14
  response: RunResponse = await agent.arun(answer_text)
15
  return response.content
 
17
 
18
  class BasicAgent:
19
  def __init__(self):
20
+ pass
21
 
22
  def __call__(self, question: str) -> str:
23
+ return asyncio.run(_async_answer(question))
 
 
 
 
24
 
25
 
26
+ def run_agent(profile: gr.OAuthProfile | None, task_id: str | None = None, submit: bool = True):
27
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
28
  if profile:
29
  username = f"{profile.username}"
 
30
  else:
31
+ return "Please log in to Hugging Face.", None
 
32
 
33
  api_url = DEFAULT_API_URL
34
  questions_url = f"{api_url}/questions"
35
  submit_url = f"{api_url}/submit"
36
 
 
37
  try:
38
+ agent_instance = BasicAgent()
39
  except Exception as e:
 
40
  return f"Error initializing agent: {e}", None
41
+
42
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
43
 
 
 
44
  try:
45
  response = requests.get(questions_url, timeout=15)
46
  response.raise_for_status()
47
  questions_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
48
  except Exception as e:
49
+ return f"Error fetching questions: {e}", None
50
+
51
+ if task_id:
52
+ questions_data = [q for q in questions_data if str(q.get("task_id")) == str(task_id)]
53
+ if not questions_data:
54
+ return f"Task {task_id} not found.", None
55
 
 
56
  results_log = []
57
  answers_payload = []
 
58
  for item in questions_data:
59
+ tid = item.get("task_id")
60
+ qtext = item.get("question")
61
+ if not tid or qtext is None:
 
62
  continue
63
  try:
64
+ submitted_answer = agent_instance(qtext)
65
+ answers_payload.append({"task_id": tid, "submitted_answer": submitted_answer})
66
+ results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": submitted_answer})
67
  except Exception as e:
68
+ results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": f"AGENT ERROR: {e}"})
 
69
 
70
  if not answers_payload:
71
+ return "No answers produced.", pd.DataFrame(results_log)
72
+
73
+ if not submit:
74
+ return "Test mode: nothing submitted.", pd.DataFrame(results_log)
75
 
76
+ submission_data = {
77
+ "username": username.strip(),
78
+ "agent_code": agent_code,
79
+ "answers": answers_payload,
80
+ }
81
 
 
 
82
  try:
83
  response = requests.post(submit_url, json=submission_data, timeout=60)
84
  response.raise_for_status()
85
  result_data = response.json()
86
  final_status = (
87
+ f"Submission Successful\n"
88
  f"User: {result_data.get('username')}\n"
89
+ f"Score: {result_data.get('score', 'N/A')}% "
90
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})\n"
91
+ f"Message: {result_data.get('message', '')}"
92
  )
93
+ return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
96
+
97
+
98
+ def run_agent_single(profile: gr.OAuthProfile | None, task_id: str):
99
+ return run_agent(profile, task_id or None, submit=False)
100
+
101
+
102
+ def run_agent_all(profile: gr.OAuthProfile | None, task_id: str):
103
+ return run_agent(profile, task_id or None, submit=True)
104
 
105
 
 
106
  with gr.Blocks() as demo:
107
  gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  gr.LoginButton()
110
 
111
+ task_id_input = gr.Textbox(label="Task ID (optional)", placeholder="e.g. 2023060607")
112
+ run_test_button = gr.Button("Test Single Task (no submit)")
113
+ run_all_button = gr.Button("Run & Submit All")
114
 
115
+ status_output = gr.Textbox(label="Status", lines=5, interactive=False)
116
+ results_table = gr.DataFrame(label="Results", wrap=True)
 
117
 
118
+ run_test_button.click(
119
+ fn=run_agent_single,
120
+ inputs=[task_id_input],
121
+ outputs=[status_output, results_table],
122
  )
123
 
124
+ run_all_button.click(
125
+ fn=run_agent_all,
126
+ inputs=[task_id_input],
127
+ outputs=[status_output, results_table],
128
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ gr.Markdown(
131
+ "Running all tasks may take time. Use the single‑task button to debug quickly."
132
+ )
133
 
134
+ if __name__ == "__main__":
135
+ space_host = os.getenv("SPACE_HOST")
136
+ space_id = os.getenv("SPACE_ID")
137
+ if space_host:
138
+ print(f"SPACE_HOST: {space_host}")
139
+ if space_id:
140
+ print(f"SPACE_ID: {space_id}")
141
+
142
+ parser = argparse.ArgumentParser()
143
+ parser.add_argument("--task-id", help="Run a single task locally without submission")
144
+ args, _ = parser.parse_known_args()
145
+
146
+ if args.task_id:
147
+ status, table = run_agent(profile=None, task_id=args.task_id, submit=False)
148
+ print(status)
149
+ if table is not None:
150
+ print(table)
151
+ else:
152
+ demo.launch(debug=True, share=False)