ashishja commited on
Commit
7d1b068
·
verified ·
1 Parent(s): b76db19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -118
app.py CHANGED
@@ -1,195 +1,210 @@
1
- from zoneinfo import ZoneInfo
2
- from google.adk.agents import Agent,BaseAgent,LlmAgent
3
- from google.adk.tools import google_search
4
- from google.adk.runners import Runner
5
- from google.adk.sessions import InMemorySessionService
6
- from google.genai import types
7
  import google.genai.types as types
8
  import requests
9
- from google.adk.events import Event, EventActions
10
  from google.adk.agents.invocation_context import InvocationContext
11
- from typing import AsyncGenerator, Dict, Any
12
- from google.genai import types as genai_types
13
- from google.adk.tools import ToolContext, FunctionTool
14
- import logging
15
- #from google.adk.tools import built_in_code_execution
16
- from google.adk.tools import agent_tool
17
- import asyncio
 
 
 
18
 
 
19
  logging.basicConfig(level=logging.ERROR)
20
 
21
- url = 'https://agents-course-unit4-scoring.hf.space/questions'
22
- headers = {'accept': 'application/json'}
23
- response = requests.get(url, headers=headers)
24
 
25
- def answer_questions():
26
- """Fetches questions from the scoring API."""
 
 
 
 
27
  url = 'https://agents-course-unit4-scoring.hf.space/questions'
28
  headers = {'accept': 'application/json'}
29
- response = requests.get(url, headers=headers)
30
- response.raise_for_status() # Raise an exception for bad status codes
31
- prompts = []
32
- for i in response.json():
33
- task_id = i['task_id']
34
- if i['file_name']:
35
- url_file = f"https://agents-course-unit4-scoring.hf.space/files/{i['task_id']}"
36
- question = i['question']
37
- prompt = f"{task_id}:{question} and the file is {url_file}, give the final answer only"
38
- else:
39
- question = i['question']
40
- prompt = f"{task_id}:{question} give the final answer only"
41
- prompts.append(prompt)
42
- return prompts
 
 
 
 
 
43
 
44
  def submit_questions(answers: list[Dict[str, Any]]) -> Dict[str, Any]:
45
- """Submits the collected answers to the scoring API."""
 
 
 
 
 
 
 
 
 
 
 
 
46
  url = 'https://agents-course-unit4-scoring.hf.space/submit'
47
  payload = {
48
- "username": "ashishja",
49
- "agent_code": "https://huggingface.co/spaces/ashishja/Agents_Course_Final_Assignment_Ashish",
50
  "answers": answers
51
  }
52
  headers = {'accept': 'application/json', "Content-Type": "application/json"}
53
- response = requests.post(url, headers=headers, json=payload)
54
- import json
55
- print("Submitting the following payload:")
56
- print(json.dumps(payload, indent=2))
57
- if response.status_code == 200:
58
  print("Submission successful!")
 
59
  return response.json()
60
- else:
61
- print(f"Submission failed with status {response.status_code}: {response.text}")
62
- response.raise_for_status()
 
63
 
64
- responses_api = FunctionTool(func=answer_questions)
65
- submit_api = FunctionTool(func=submit_questions)
 
66
 
67
- # class QuestionAnswerer(LlmAgent):
68
- # async def _run_async_impl(self, ctx: InvocationContext) -> AsyncGenerator[Event, None]:
69
- # questions_to_answer = ctx.session_service.get('fetched_questions', [])
70
- # for q in questions_to_answer:
71
- # answer = await self._llm(messages=[types.ChatMessage(role="user", parts=[types.Part(text=q)])])
72
- # yield Event(author=self.name, content=answer.content)
73
- #
74
- # qa = QuestionAnswerer(name = 'qa_1', model="gemini-1.5-flash-latest", description="Question Answerer")
75
 
76
- APP_NAME="final_assignment_agent"
77
- USER_ID="user1234"
78
- SESSION_ID="5678"
79
 
 
80
  code_agent = LlmAgent(
81
- name='codegaiaAgent',
82
- model="gemini-2.0-flash",
83
- description=(
84
- "You are a smart agent that can write and execute code to answer questions. Use this for questions involving code files (.py) or data files (.csv, .xlsx, .json, .txt)."
85
- ),
86
  instruction=(
87
- "If the question contains a file with .py, get the code file and, depending on the question and the file provided, execute the code and provide the final answer. "
88
- "If the question contains a spreadsheet file like .xlsx or .csv, get the file, use pandas to analyze it, and provide the final answer. "
89
- "If the question contains a file with .txt or .json, get the file and use code to parse it and answer the question. "
90
- "Always use the code execution tool to run your code and provide only the final answer."
91
  ),
92
- #tools=[built_in_code_execution],
93
  )
94
 
 
95
  search_agent = LlmAgent(
96
- name='searchgaiaAgent',
97
- model="gemini-2.0-flash",
98
- description=(
99
- "You are a smart agent that can search the web to answer questions."
100
- ),
101
  instruction=(
102
- "Get the URL associated with the question, perform a web search, consolidate the information, and answer the provided question."
 
 
 
103
  ),
104
  tools=[google_search],
105
  )
106
 
 
107
  image_agent = LlmAgent(
108
- name='imagegaiaAgent',
109
- model="gemini-2.0-flash",
110
- description=(
111
- "You are a smart agent that can analyze an image file and answer any questions related to it."
112
- ),
113
  instruction=(
114
- "Get the image file from the link provided in the prompt. Use your multimodal capabilities to understand the image and answer the question."
 
 
115
  ),
116
  )
117
 
 
118
  youtube_agent = LlmAgent(
119
- name='youtubegaiaAgent',
120
- model="gemini-2.0-flash",
121
- description=(
122
- "You are a smart agent that can watch a YouTube video and answer any questions related to it."
123
- ),
124
  instruction=(
125
- "Get the YouTube link from the prompt. Use your multimodal capabilities to watch the video and answer the provided question."
 
 
126
  ),
127
  )
128
 
 
129
  root_agent = LlmAgent(
130
- name='basegaiaAgent',
131
- model="gemini-2.0-flash",
132
- description=(
133
- "You are a master agent that orchestrates sub-agents to answer various types of questions."
134
- ),
135
  instruction=(
136
- "You are a helpful orchestrator agent. Your primary goal is to answer a series of questions and submit them. "
137
- "First, invoke your tool 'answer_questions' to retrieve the list of questions. "
138
- "Once you receive the list, iterate through each question. For each one, delegate to the most appropriate sub-agent (code, search, youtube, image) based on its description and the question's content (e.g., file type). "
139
- "After getting the answer from the sub-agent, format it into a dictionary with 'task_id' and 'submitted_answer' keys. The task_id is at the beginning of each question string, separated by a colon. "
140
- "Collect all these answer dictionaries into a single list. "
141
- "Finally, pass this complete list of dictionaries to the 'submit_questions' tool to submit all answers at once."
 
142
  ),
143
  tools=[
144
  responses_api,
145
  submit_api,
146
  agent_tool.AgentTool(agent=code_agent),
147
  agent_tool.AgentTool(agent=search_agent),
 
148
  agent_tool.AgentTool(agent=youtube_agent),
149
- agent_tool.AgentTool(agent=image_agent)
150
  ],
151
  )
152
 
 
 
153
  session_service = InMemorySessionService()
154
  runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=session_service)
155
 
156
- async def process_questions_and_answer():
157
- """
158
- Orchestrates the entire process of fetching questions, answering them
159
- using the agent, and submitting the final answers.
160
- """
161
  session = await session_service.create_session(
162
  app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID
163
  )
164
- print(f"===== Application Startup at {session.create_time} =====")
165
- print(f"Session created: {session.session_id}")
166
-
167
- # Initial prompt to kick off the agent's task
168
- initial_prompt = "Please get all the questions, answer each one by delegating to the correct tool or sub-agent, format the answers, and then submit the final list."
169
-
170
- print("\nSending initial prompt to the agent...")
171
- print(f"Prompt: '{initial_prompt}'")
172
-
173
- # Run the agent and stream events
174
  async for event in runner.run_async(
175
  session_id=session.session_id,
176
  content=types.Content(role="user", parts=[types.Part(text=initial_prompt)]),
177
  ):
 
178
  if event.action == EventActions.AGENT_RESPONSE and event.author == root_agent.name:
179
  if event.content and event.content.parts:
180
- print(f"\nFinal Agent Response: {event.content.parts[0].text}")
181
  elif event.action == EventActions.TOOL_OUTPUT:
182
  if event.content and event.content.parts and event.content.parts[0].tool_output:
183
  tool_output = event.content.parts[0].tool_output
184
- print(f"\n<-- Tool Output from {tool_output.tool_name}:")
185
- for key, value in tool_output.data.items():
186
- print(f" {key}: {value}")
187
 
188
- print("\n===== Task Complete =====")
189
 
190
  async def main():
191
  """Main entry point for the application."""
192
- await process_questions_and_answer()
193
 
194
  if __name__ == "__main__":
195
  asyncio.run(main())
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ from typing import AsyncGenerator, Dict, Any
5
+
 
6
  import google.genai.types as types
7
  import requests
8
+ from google.adk.agents import BaseAgent, LlmAgent
9
  from google.adk.agents.invocation_context import InvocationContext
10
+ from google.adk.events import Event, EventActions
11
+ from google.adk.runners import Runner
12
+ from google.adk.sessions import InMemorySessionService
13
+ from google.adk.tools import (
14
+ FunctionTool,
15
+ ToolContext,
16
+ agent_tool,
17
+ # built_in_code_execution,
18
+ google_search,
19
+ )
20
 
21
+ # Configure logging to suppress verbose output
22
  logging.basicConfig(level=logging.ERROR)
23
 
24
+ # --- API Interaction Functions ---
 
 
25
 
26
+ def answer_questions() -> list[str]:
27
+ """
28
+ Fetches the full list of evaluation questions from the scoring API.
29
+ Each question is formatted with its task_id.
30
+ """
31
+ print("Attempting to fetch questions from the API...")
32
  url = 'https://agents-course-unit4-scoring.hf.space/questions'
33
  headers = {'accept': 'application/json'}
34
+ try:
35
+ response = requests.get(url, headers=headers)
36
+ response.raise_for_status() # Raise an exception for bad status codes
37
+ questions_data = response.json()
38
+ print(f"Successfully fetched {len(questions_data)} questions.")
39
+ prompts = []
40
+ for item in questions_data:
41
+ task_id = item['task_id']
42
+ question_text = item['question']
43
+ if item.get('file_name'):
44
+ file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
45
+ prompt = f"{task_id}:{question_text} The URL for the associated file is: {file_url}"
46
+ else:
47
+ prompt = f"{task_id}:{question_text}"
48
+ prompts.append(prompt)
49
+ return prompts
50
+ except requests.exceptions.RequestException as e:
51
+ print(f"Error fetching questions: {e}")
52
+ return []
53
 
54
  def submit_questions(answers: list[Dict[str, Any]]) -> Dict[str, Any]:
55
+ """
56
+ Submits the collected answers to the scoring API.
57
+
58
+ Args:
59
+ answers: A list of dictionaries, where each dictionary contains
60
+ a 'task_id' and a 'submitted_answer'.
61
+ """
62
+ # !!! IMPORTANT !!!
63
+ # REPLACE the username and agent_code with your own details.
64
+ username = "YOUR_HUGGING_FACE_USERNAME"
65
+ agent_code_url = "https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME/tree/main"
66
+
67
+ print(f"Attempting to submit {len(answers)} answers for user '{username}'...")
68
  url = 'https://agents-course-unit4-scoring.hf.space/submit'
69
  payload = {
70
+ "username": username,
71
+ "agent_code": agent_code_url,
72
  "answers": answers
73
  }
74
  headers = {'accept': 'application/json', "Content-Type": "application/json"}
75
+
76
+ try:
77
+ response = requests.post(url, headers=headers, json=payload)
78
+ response.raise_for_status()
 
79
  print("Submission successful!")
80
+ print("Response:", response.json())
81
  return response.json()
82
+ except requests.exceptions.RequestException as e:
83
+ print(f"Error submitting answers: {e}")
84
+ print(f"Response Body: {e.response.text if e.response else 'No response'}")
85
+ raise
86
 
87
+ # Wrap API functions in ADK Tools
88
+ responses_api = FunctionTool(func=answer_questions, description="Fetches all questions from the remote server.")
89
+ submit_api = FunctionTool(func=submit_questions, description="Submits the final list of answers to the remote server for scoring.")
90
 
91
+ # --- Agent Definitions ---
 
 
 
 
 
 
 
92
 
93
+ APP_NAME = "gaia_challenge_agent"
94
+ USER_ID = "test_user"
95
+ SESSION_ID = "main_session"
96
 
97
+ # A specialized agent for tasks requiring code execution or data analysis
98
  code_agent = LlmAgent(
99
+ name='CodeAgent',
100
+ model="gemini-1.5-pro-latest", # Using Pro for complex code generation
101
+ description="Executes code and analyzes data files (.csv, .xlsx, .json, .py) to answer a question. Responds with only the final, exact answer.",
 
 
102
  instruction=(
103
+ "You are an expert in data analysis and code execution. Given a question and a file URL, "
104
+ "write Python code to find the answer. "
105
+ "Use pandas for data files. Fetch remote files using requests. "
106
+ "Your final output must be only the answer to the question, with no extra text or explanation."
107
  ),
108
+ # tools=[built_in_code_execution],
109
  )
110
 
111
+ # A specialized agent for web searches
112
  search_agent = LlmAgent(
113
+ name='SearchAgent',
114
+ model="gemini-1.5-flash-latest", # Flash is efficient for search-and-answer
115
+ description="Searches the web to answer questions about current events, facts, or general knowledge. Responds with only the final, exact answer.",
 
 
116
  instruction=(
117
+ "You are an expert web researcher. You will be given a question. "
118
+ "Use your search tool to find the most accurate information. "
119
+ "Synthesize the findings and provide a concise, direct answer to the question. "
120
+ "Your final output must be only the answer, with no extra text."
121
  ),
122
  tools=[google_search],
123
  )
124
 
125
+ # A specialized agent for image analysis
126
  image_agent = LlmAgent(
127
+ name='ImageAgent',
128
+ model="gemini-1.5-flash-latest", # Flash model has vision capabilities
129
+ description="Analyzes an image to answer a question about its content. Responds with only the final, exact answer.",
 
 
130
  instruction=(
131
+ "You are an expert image analyst. You will be given a question and a URL to an image. "
132
+ "Analyze the image content to answer the question. "
133
+ "Your final output must be only the answer, with no extra text."
134
  ),
135
  )
136
 
137
+ # A specialized agent for video analysis
138
  youtube_agent = LlmAgent(
139
+ name='YouTubeAgent',
140
+ model="gemini-1.5-flash-latest", # Flash model has vision capabilities
141
+ description="Watches a YouTube video to answer a question about its content. Responds with only the final, exact answer.",
 
 
142
  instruction=(
143
+ "You are an expert video analyst. You will be given a question and a URL to a YouTube video. "
144
+ "Analyze the video content to answer the question. "
145
+ "Your final output must be only the answer, with no extra text."
146
  ),
147
  )
148
 
149
+ # The main orchestrator agent
150
  root_agent = LlmAgent(
151
+ name='OrchestratorAgent',
152
+ model="gemini-1.5-pro-latest", # Pro for robust orchestration
153
+ description="Manages a team of specialized agents to answer a list of questions and submits them for scoring.",
 
 
154
  instruction=(
155
+ "You are the project manager. Your goal is to answer a series of questions and submit them. "
156
+ "1. **FETCH**: Start by using the `answer_questions` tool to get the list of all tasks. "
157
+ "2. **DELEGATE**: For each task string, which contains a 'task_id:question', extract the task_id and the question. "
158
+ " - Determine the best specialized agent for the job (Code, Search, Image, YouTube) based on the question and any file URLs. "
159
+ " - Invoke that agent with the question and necessary context (like the file URL). "
160
+ "3. **COLLECT**: Get the precise answer back from the specialist agent. Create a dictionary: `{'task_id': 'the_id', 'submitted_answer': 'the_answer'}`. The answer must be exact, without any extra formatting or text. "
161
+ "4. **SUBMIT**: After processing all questions, gather all the answer dictionaries into a single list. Call the `submit_questions` tool with this list to complete the assignment."
162
  ),
163
  tools=[
164
  responses_api,
165
  submit_api,
166
  agent_tool.AgentTool(agent=code_agent),
167
  agent_tool.AgentTool(agent=search_agent),
168
+ agent_tool.AgentTool(agent=image_agent),
169
  agent_tool.AgentTool(agent=youtube_agent),
 
170
  ],
171
  )
172
 
173
+ # --- Application Runner ---
174
+
175
  session_service = InMemorySessionService()
176
  runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=session_service)
177
 
178
+ async def run_agent_process():
179
+ """Initializes a session and runs the agent's main task."""
 
 
 
180
  session = await session_service.create_session(
181
  app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID
182
  )
183
+ # Corrected the print statement below
184
+ print(f"===== Agent Process Started for session: {session.session_id} =====")
185
+
186
+ initial_prompt = "Get all the questions, answer each one using your specialized agents, and submit the final list of answers for scoring."
187
+
188
+ print(f"\nSending initial prompt to the Orchestrator Agent:\n'{initial_prompt}'")
189
+
 
 
 
190
  async for event in runner.run_async(
191
  session_id=session.session_id,
192
  content=types.Content(role="user", parts=[types.Part(text=initial_prompt)]),
193
  ):
194
+ # Optional: Print events for debugging
195
  if event.action == EventActions.AGENT_RESPONSE and event.author == root_agent.name:
196
  if event.content and event.content.parts:
197
+ print(f"\n[Orchestrator Response]: {event.content.parts[0].text}")
198
  elif event.action == EventActions.TOOL_OUTPUT:
199
  if event.content and event.content.parts and event.content.parts[0].tool_output:
200
  tool_output = event.content.parts[0].tool_output
201
+ print(f"\n<-- [Tool Output] from `{tool_output.tool_name}`")
 
 
202
 
203
+ print("\n===== Agent Process Finished =====")
204
 
205
  async def main():
206
  """Main entry point for the application."""
207
+ await run_agent_process()
208
 
209
  if __name__ == "__main__":
210
  asyncio.run(main())