Agents_Course_Final_Assignment_Ashish

Sleeping

App Files Files Community

ashishja commited on Jun 30

Commit

7d1b068

verified ·

1 Parent(s): b76db19

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -118

app.py CHANGED Viewed

@@ -1,195 +1,210 @@
-from zoneinfo import ZoneInfo
-from google.adk.agents import Agent,BaseAgent,LlmAgent
-from google.adk.tools import google_search
-from google.adk.runners import Runner
-from google.adk.sessions import InMemorySessionService
-from google.genai import types
 import google.genai.types as types
 import requests
-from google.adk.events import Event, EventActions
 from google.adk.agents.invocation_context import InvocationContext
-from typing import AsyncGenerator, Dict, Any
-from google.genai import types as genai_types
-from google.adk.tools import ToolContext, FunctionTool
-import logging
-#from google.adk.tools import built_in_code_execution
-from google.adk.tools import agent_tool
-import asyncio
 logging.basicConfig(level=logging.ERROR)
-url = 'https://agents-course-unit4-scoring.hf.space/questions'
-headers = {'accept': 'application/json'}
-response = requests.get(url, headers=headers)
-def answer_questions():
-    """Fetches questions from the scoring API."""
     url = 'https://agents-course-unit4-scoring.hf.space/questions'
     headers = {'accept': 'application/json'}
-    response = requests.get(url, headers=headers)
-    response.raise_for_status()  # Raise an exception for bad status codes
-    prompts = []
-    for i in response.json():
-        task_id = i['task_id']
-        if i['file_name']:
-            url_file = f"https://agents-course-unit4-scoring.hf.space/files/{i['task_id']}"
-            question = i['question']
-            prompt = f"{task_id}:{question} and the file is {url_file}, give the final answer only"
-        else:
-            question = i['question']
-            prompt = f"{task_id}:{question} give the final answer only"
-        prompts.append(prompt)
-    return prompts
 def submit_questions(answers: list[Dict[str, Any]]) -> Dict[str, Any]:
-    """Submits the collected answers to the scoring API."""
     url = 'https://agents-course-unit4-scoring.hf.space/submit'
     payload = {
-        "username": "ashishja",
-        "agent_code": "https://huggingface.co/spaces/ashishja/Agents_Course_Final_Assignment_Ashish",
         "answers": answers
     }
     headers = {'accept': 'application/json', "Content-Type": "application/json"}
-    response = requests.post(url, headers=headers, json=payload)
-    import json
-    print("Submitting the following payload:")
-    print(json.dumps(payload, indent=2))
-    if response.status_code == 200:
         print("Submission successful!")
         return response.json()
-    else:
-        print(f"Submission failed with status {response.status_code}: {response.text}")
-        response.raise_for_status()
-responses_api = FunctionTool(func=answer_questions)
-submit_api = FunctionTool(func=submit_questions)
-# class QuestionAnswerer(LlmAgent):
-#     async def _run_async_impl(self, ctx: InvocationContext) -> AsyncGenerator[Event, None]:
-#         questions_to_answer = ctx.session_service.get('fetched_questions', [])
-#         for q in questions_to_answer:
-#             answer = await self._llm(messages=[types.ChatMessage(role="user", parts=[types.Part(text=q)])])
-#             yield Event(author=self.name, content=answer.content)
-#
-# qa = QuestionAnswerer(name = 'qa_1', model="gemini-1.5-flash-latest", description="Question Answerer")
-APP_NAME="final_assignment_agent"
-USER_ID="user1234"
-SESSION_ID="5678"
 code_agent = LlmAgent(
-    name='codegaiaAgent',
-    model="gemini-2.0-flash",
-    description=(
-        "You are a smart agent that can write and execute code to answer questions. Use this for questions involving code files (.py) or data files (.csv, .xlsx, .json, .txt)."
-    ),
     instruction=(
-        "If the question contains a file with .py, get the code file and, depending on the question and the file provided, execute the code and provide the final answer. "
-        "If the question contains a spreadsheet file like .xlsx or .csv, get the file, use pandas to analyze it, and provide the final answer. "
-        "If the question contains a file with .txt or .json, get the file and use code to parse it and answer the question. "
-        "Always use the code execution tool to run your code and provide only the final answer."
     ),
-    #tools=[built_in_code_execution],
 )
 search_agent = LlmAgent(
-    name='searchgaiaAgent',
-    model="gemini-2.0-flash",
-    description=(
-        "You are a smart agent that can search the web to answer questions."
-    ),
     instruction=(
-        "Get the URL associated with the question, perform a web search, consolidate the information, and answer the provided question."
     ),
     tools=[google_search],
 )
 image_agent = LlmAgent(
-    name='imagegaiaAgent',
-    model="gemini-2.0-flash",
-    description=(
-        "You are a smart agent that can analyze an image file and answer any questions related to it."
-    ),
     instruction=(
-        "Get the image file from the link provided in the prompt. Use your multimodal capabilities to understand the image and answer the question."
     ),
 )
 youtube_agent = LlmAgent(
-    name='youtubegaiaAgent',
-    model="gemini-2.0-flash",
-    description=(
-        "You are a smart agent that can watch a YouTube video and answer any questions related to it."
-    ),
     instruction=(
-        "Get the YouTube link from the prompt. Use your multimodal capabilities to watch the video and answer the provided question."
     ),
 )
 root_agent = LlmAgent(
-    name='basegaiaAgent',
-    model="gemini-2.0-flash",
-    description=(
-        "You are a master agent that orchestrates sub-agents to answer various types of questions."
-    ),
     instruction=(
-        "You are a helpful orchestrator agent. Your primary goal is to answer a series of questions and submit them. "
-        "First, invoke your tool 'answer_questions' to retrieve the list of questions. "
-        "Once you receive the list, iterate through each question. For each one, delegate to the most appropriate sub-agent (code, search, youtube, image) based on its description and the question's content (e.g., file type). "
-        "After getting the answer from the sub-agent, format it into a dictionary with 'task_id' and 'submitted_answer' keys. The task_id is at the beginning of each question string, separated by a colon. "
-        "Collect all these answer dictionaries into a single list. "
-        "Finally, pass this complete list of dictionaries to the 'submit_questions' tool to submit all answers at once."
     ),
     tools=[
         responses_api,
         submit_api,
         agent_tool.AgentTool(agent=code_agent),
         agent_tool.AgentTool(agent=search_agent),
         agent_tool.AgentTool(agent=youtube_agent),
-        agent_tool.AgentTool(agent=image_agent)
     ],
 )
 session_service = InMemorySessionService()
 runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=session_service)
-async def process_questions_and_answer():
-    """
-    Orchestrates the entire process of fetching questions, answering them
-    using the agent, and submitting the final answers.
-    """
     session = await session_service.create_session(
         app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID
     )
-    print(f"===== Application Startup at {session.create_time} =====")
-    print(f"Session created: {session.session_id}")
-    # Initial prompt to kick off the agent's task
-    initial_prompt = "Please get all the questions, answer each one by delegating to the correct tool or sub-agent, format the answers, and then submit the final list."
-    print("\nSending initial prompt to the agent...")
-    print(f"Prompt: '{initial_prompt}'")
-    # Run the agent and stream events
     async for event in runner.run_async(
         session_id=session.session_id,
         content=types.Content(role="user", parts=[types.Part(text=initial_prompt)]),
     ):
         if event.action == EventActions.AGENT_RESPONSE and event.author == root_agent.name:
             if event.content and event.content.parts:
-                print(f"\nFinal Agent Response: {event.content.parts[0].text}")
         elif event.action == EventActions.TOOL_OUTPUT:
              if event.content and event.content.parts and event.content.parts[0].tool_output:
                 tool_output = event.content.parts[0].tool_output
-                print(f"\n<-- Tool Output from {tool_output.tool_name}:")
-                for key, value in tool_output.data.items():
-                    print(f"    {key}: {value}")
-    print("\n===== Task Complete =====")
 async def main():
     """Main entry point for the application."""
-    await process_questions_and_answer()
 if __name__ == "__main__":
     asyncio.run(main())

+import asyncio
+import json
+import logging
+from typing import AsyncGenerator, Dict, Any
 import google.genai.types as types
 import requests
+from google.adk.agents import BaseAgent, LlmAgent
 from google.adk.agents.invocation_context import InvocationContext
+from google.adk.events import Event, EventActions
+from google.adk.runners import Runner
+from google.adk.sessions import InMemorySessionService
+from google.adk.tools import (
+    FunctionTool,
+    ToolContext,
+    agent_tool,
+    # built_in_code_execution,
+    google_search,
+)
+# Configure logging to suppress verbose output
 logging.basicConfig(level=logging.ERROR)
+# --- API Interaction Functions ---
+def answer_questions() -> list[str]:
+    """
+    Fetches the full list of evaluation questions from the scoring API.
+    Each question is formatted with its task_id.
+    """
+    print("Attempting to fetch questions from the API...")
     url = 'https://agents-course-unit4-scoring.hf.space/questions'
     headers = {'accept': 'application/json'}
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        questions_data = response.json()
+        print(f"Successfully fetched {len(questions_data)} questions.")
+        prompts = []
+        for item in questions_data:
+            task_id = item['task_id']
+            question_text = item['question']
+            if item.get('file_name'):
+                file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+                prompt = f"{task_id}:{question_text} The URL for the associated file is: {file_url}"
+            else:
+                prompt = f"{task_id}:{question_text}"
+            prompts.append(prompt)
+        return prompts
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return []
 def submit_questions(answers: list[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Submits the collected answers to the scoring API.
+    Args:
+        answers: A list of dictionaries, where each dictionary contains
+                 a 'task_id' and a 'submitted_answer'.
+    """
+    # !!! IMPORTANT !!!
+    # REPLACE the username and agent_code with your own details.
+    username = "YOUR_HUGGING_FACE_USERNAME"
+    agent_code_url = "https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME/tree/main"
+    print(f"Attempting to submit {len(answers)} answers for user '{username}'...")
     url = 'https://agents-course-unit4-scoring.hf.space/submit'
     payload = {
+        "username": username,
+        "agent_code": agent_code_url,
         "answers": answers
     }
     headers = {'accept': 'application/json', "Content-Type": "application/json"}
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response.raise_for_status()
         print("Submission successful!")
+        print("Response:", response.json())
         return response.json()
+    except requests.exceptions.RequestException as e:
+        print(f"Error submitting answers: {e}")
+        print(f"Response Body: {e.response.text if e.response else 'No response'}")
+        raise
+# Wrap API functions in ADK Tools
+responses_api = FunctionTool(func=answer_questions, description="Fetches all questions from the remote server.")
+submit_api = FunctionTool(func=submit_questions, description="Submits the final list of answers to the remote server for scoring.")
+# --- Agent Definitions ---
+APP_NAME = "gaia_challenge_agent"
+USER_ID = "test_user"
+SESSION_ID = "main_session"
+# A specialized agent for tasks requiring code execution or data analysis
 code_agent = LlmAgent(
+    name='CodeAgent',
+    model="gemini-1.5-pro-latest", # Using Pro for complex code generation
+    description="Executes code and analyzes data files (.csv, .xlsx, .json, .py) to answer a question. Responds with only the final, exact answer.",
     instruction=(
+        "You are an expert in data analysis and code execution. Given a question and a file URL, "
+        "write Python code to find the answer. "
+        "Use pandas for data files. Fetch remote files using requests. "
+        "Your final output must be only the answer to the question, with no extra text or explanation."
     ),
+    # tools=[built_in_code_execution],
 )
+# A specialized agent for web searches
 search_agent = LlmAgent(
+    name='SearchAgent',
+    model="gemini-1.5-flash-latest", # Flash is efficient for search-and-answer
+    description="Searches the web to answer questions about current events, facts, or general knowledge. Responds with only the final, exact answer.",
     instruction=(
+        "You are an expert web researcher. You will be given a question. "
+        "Use your search tool to find the most accurate information. "
+        "Synthesize the findings and provide a concise, direct answer to the question. "
+        "Your final output must be only the answer, with no extra text."
     ),
     tools=[google_search],
 )
+# A specialized agent for image analysis
 image_agent = LlmAgent(
+    name='ImageAgent',
+    model="gemini-1.5-flash-latest", # Flash model has vision capabilities
+    description="Analyzes an image to answer a question about its content. Responds with only the final, exact answer.",
     instruction=(
+        "You are an expert image analyst. You will be given a question and a URL to an image. "
+        "Analyze the image content to answer the question. "
+        "Your final output must be only the answer, with no extra text."
     ),
 )
+# A specialized agent for video analysis
 youtube_agent = LlmAgent(
+    name='YouTubeAgent',
+    model="gemini-1.5-flash-latest", # Flash model has vision capabilities
+    description="Watches a YouTube video to answer a question about its content. Responds with only the final, exact answer.",
     instruction=(
+        "You are an expert video analyst. You will be given a question and a URL to a YouTube video. "
+        "Analyze the video content to answer the question. "
+        "Your final output must be only the answer, with no extra text."
     ),
 )
+# The main orchestrator agent
 root_agent = LlmAgent(
+    name='OrchestratorAgent',
+    model="gemini-1.5-pro-latest", # Pro for robust orchestration
+    description="Manages a team of specialized agents to answer a list of questions and submits them for scoring.",
     instruction=(
+        "You are the project manager. Your goal is to answer a series of questions and submit them. "
+        "1. **FETCH**: Start by using the `answer_questions` tool to get the list of all tasks. "
+        "2. **DELEGATE**: For each task string, which contains a 'task_id:question', extract the task_id and the question. "
+        "   - Determine the best specialized agent for the job (Code, Search, Image, YouTube) based on the question and any file URLs. "
+        "   - Invoke that agent with the question and necessary context (like the file URL). "
+        "3. **COLLECT**: Get the precise answer back from the specialist agent. Create a dictionary: `{'task_id': 'the_id', 'submitted_answer': 'the_answer'}`. The answer must be exact, without any extra formatting or text. "
+        "4. **SUBMIT**: After processing all questions, gather all the answer dictionaries into a single list. Call the `submit_questions` tool with this list to complete the assignment."
     ),
     tools=[
         responses_api,
         submit_api,
         agent_tool.AgentTool(agent=code_agent),
         agent_tool.AgentTool(agent=search_agent),
+        agent_tool.AgentTool(agent=image_agent),
         agent_tool.AgentTool(agent=youtube_agent),
     ],
 )
+# --- Application Runner ---
 session_service = InMemorySessionService()
 runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=session_service)
+async def run_agent_process():
+    """Initializes a session and runs the agent's main task."""
     session = await session_service.create_session(
         app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID
     )
+    # Corrected the print statement below
+    print(f"===== Agent Process Started for session: {session.session_id} =====")
+    initial_prompt = "Get all the questions, answer each one using your specialized agents, and submit the final list of answers for scoring."
+    print(f"\nSending initial prompt to the Orchestrator Agent:\n'{initial_prompt}'")
     async for event in runner.run_async(
         session_id=session.session_id,
         content=types.Content(role="user", parts=[types.Part(text=initial_prompt)]),
     ):
+        # Optional: Print events for debugging
         if event.action == EventActions.AGENT_RESPONSE and event.author == root_agent.name:
             if event.content and event.content.parts:
+                print(f"\n[Orchestrator Response]: {event.content.parts[0].text}")
         elif event.action == EventActions.TOOL_OUTPUT:
              if event.content and event.content.parts and event.content.parts[0].tool_output:
                 tool_output = event.content.parts[0].tool_output
+                print(f"\n<-- [Tool Output] from `{tool_output.tool_name}`")
+    print("\n===== Agent Process Finished =====")
 async def main():
     """Main entry point for the application."""
+    await run_agent_process()
 if __name__ == "__main__":
     asyncio.run(main())