HF_Agents_Final_Assignment

Build error

App Files Files Community

leofltt commited on Jun 17

Commit

97a46b7

1 Parent(s): 070630f

fix reqs & app

Browse files

Files changed (2) hide show

app.py +35 -64
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -5,12 +5,12 @@ import pandas as pd
 import torch
 import base64
 from io import BytesIO
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
-from llama_index.tools.python_repl import PythonREPLTool
 from youtube_transcript_api import YouTubeTranscriptApi
 from PIL import Image
@@ -45,60 +45,34 @@ def analyze_image_url(image_url: str, question: str):
         return (
             "Error: Hugging Face token is not set. Cannot use the image analysis tool."
         )
     try:
-        # Download image
         response = requests.get(image_url)
         response.raise_for_status()
-        # Prepare data for the Inference API
         image_bytes = BytesIO(response.content).getvalue()
-        # Call Inference API
-        headers = {
-            "Authorization": f"Bearer {HF_TOKEN}",
-            "Content-Type": "image/png",  # Specify content type
-        }
-        # The Llava prompt format is specific
-        prompt = f"USER: <image>\n{question}\nASSISTANT:"
-        # To send both image and text, we can't use a simple JSON payload.
-        # A common approach is to use a multi-part form, but the HF API
-        # can be tricky. Let's try a different model that supports image url directly if available,
-        # or stick to a method that works with its API.
-        # For llava, sending the raw image data is the documented way.
-        # Re-checking llava API documentation for combined prompt/image...
-        # The API doesn't cleanly support separate text prompts with raw image data posts.
-        # A workaround is to embed the prompt in the image or use a model designed for this API format.
-        # Let's pivot to a model that explicitly takes a URL or a simpler payload.
-        # However, to keep it simple, we'll assume the prompt is simple enough.
-        # Let's simplify the tool's goal: describe the image, then the LLM can reason on the description.
-        # This is a more robust pattern than trying to force a complex prompt into an API.
-        description_prompt = "USER: <image>\nDescribe this image in detail.\nASSISTANT:"
-        # For the sake of this example, we will stick to the documented behavior
-        # and assume the `question` can be answered from a general description.
         response = requests.post(
             IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
         )
         response.raise_for_status()
         result = response.json()
         generated_text = result[0].get("generated_text", "").strip()
         final_answer = generated_text.split("ASSISTANT:")[-1].strip()
-        # The agent will get the description, then re-evaluate with the original question.
         return f"The image description is: {final_answer}. Now, answer the original question based on this."
     except Exception as e:
         return f"Error analyzing image: {e}"
 # --- Tool Definitions ---
 youtube_tool = FunctionTool.from_defaults(
     fn=get_video_transcript,
@@ -110,40 +84,39 @@ image_analyzer_tool = FunctionTool.from_defaults(
     name="image_analyzer_tool",
     description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
 )
-python_repl_tool = PythonREPLTool()
 # --- LlamaIndex Agent Definition ---
 class LlamaIndexAgent:
     def __init__(self):
         print("Initializing LlamaIndexAgent with Final Tools...")
         ddg_spec = DuckDuckGoSearchToolSpec()
         self.tools = [
             youtube_tool,
             image_analyzer_tool,
-            python_repl_tool,
         ] + ddg_spec.to_tool_list()
         system_prompt = """
         You are a helpful assistant tasked with answering questions.
         You have access to a set of tools to help you. These tools include:
         - A web search tool.
         - A YouTube video transcriber.
-        - An image analyzer for URLs (this tool provides a description of the image).
-        - A Python code interpreter for math and calculations.
         Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
         """
         self.llm = HuggingFaceLLM(
             model_name="HuggingFaceH4/zephyr-7b-beta",
             tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
             device_map="auto",
             model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
         )
         self.agent = ReActAgent.from_tools(
             tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
         )
@@ -153,7 +126,6 @@ class LlamaIndexAgent:
         print(f"Agent received question: {question[:80]}...")
         response = self.agent.chat(question)
         answer = str(response).strip()
         if "FINAL ANSWER:" in answer:
             final_answer = answer.split("FINAL ANSWER:")[-1].strip()
         else:
@@ -171,31 +143,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             "ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
             None,
         )
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     else:
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
         agent = LlamaIndexAgent()
     except Exception as e:
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
@@ -224,16 +191,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                     "Submitted Answer": f"AGENT ERROR: {e}",
                 }
             )
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload,
     }
     try:
         response = requests.post(submit_url, json=submission_data, timeout=180)
         response.raise_for_status()
@@ -252,15 +216,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         )
-# --- Build Gradio Interface ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Final, Fully-Featured GAIA Agent")
     gr.Markdown(
         """
-        **Agent Capabilities:** Web Search, YouTube Analysis, Image Analysis (via API), and Python Code Execution.
-        1.  **IMPORTANT**: This Space requires a Hugging Face Token to be set in the secrets as `HF_TOKEN` for the image analysis tool to work.
-        2.  Log in to your Hugging Face account using the button below.
-        3.  Click 'Run Evaluation & Submit All Answers'. This process is complex and will take a very long time.
         """
     )
     gr.LoginButton()

 import torch
 import base64
 from io import BytesIO
+import numexpr  # Using a dedicated and safe math library
 from llama_index.core.tools import FunctionTool
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from youtube_transcript_api import YouTubeTranscriptApi
 from PIL import Image
         return (
             "Error: Hugging Face token is not set. Cannot use the image analysis tool."
         )
     try:
         response = requests.get(image_url)
         response.raise_for_status()
         image_bytes = BytesIO(response.content).getvalue()
+        headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "image/png"}
         response = requests.post(
             IMAGE_ANALYSIS_API_URL, headers=headers, data=image_bytes
         )
         response.raise_for_status()
         result = response.json()
         generated_text = result[0].get("generated_text", "").strip()
         final_answer = generated_text.split("ASSISTANT:")[-1].strip()
         return f"The image description is: {final_answer}. Now, answer the original question based on this."
     except Exception as e:
         return f"Error analyzing image: {e}"
+# NEW: A custom, reliable math tool using a safe evaluator
+def evaluate_math_expression(expression: str):
+    """Evaluates a mathematical expression safely."""
+    try:
+        # Using numexpr for safe evaluation of numerical expressions
+        result = numexpr.evaluate(expression).item()
+        return result
+    except Exception as e:
+        return f"Error evaluating expression: {e}"
 # --- Tool Definitions ---
 youtube_tool = FunctionTool.from_defaults(
     fn=get_video_transcript,
     name="image_analyzer_tool",
     description="Use this tool to analyze an image when you are given a URL. Provide both the image URL and the question about the image.",
 )
+math_tool = FunctionTool.from_defaults(
+    fn=evaluate_math_expression,
+    name="math_evaluator_tool",
+    description="Use this tool to evaluate simple mathematical expressions (e.g., '3 * (4 + 2)').",
+)
 # --- LlamaIndex Agent Definition ---
 class LlamaIndexAgent:
     def __init__(self):
         print("Initializing LlamaIndexAgent with Final Tools...")
         ddg_spec = DuckDuckGoSearchToolSpec()
         self.tools = [
             youtube_tool,
             image_analyzer_tool,
+            math_tool,
         ] + ddg_spec.to_tool_list()
         system_prompt = """
         You are a helpful assistant tasked with answering questions.
         You have access to a set of tools to help you. These tools include:
         - A web search tool.
         - A YouTube video transcriber.
+        - An image analyzer for URLs.
+        - A safe calculator for mathematical expressions.
         Use a tool if it is helpful. When you have the final answer, you MUST use the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
         """
         self.llm = HuggingFaceLLM(
             model_name="HuggingFaceH4/zephyr-7b-beta",
             tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
             device_map="auto",
             model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
         )
         self.agent = ReActAgent.from_tools(
             tools=self.tools, llm=self.llm, verbose=True, system_prompt=system_prompt
         )
         print(f"Agent received question: {question[:80]}...")
         response = self.agent.chat(question)
         answer = str(response).strip()
         if "FINAL ANSWER:" in answer:
             final_answer = answer.split("FINAL ANSWER:")[-1].strip()
         else:
             "ERROR: The `HF_TOKEN` secret is not set in this Space. The image analysis tool will fail. Please set it in Settings > Secrets.",
             None,
         )
     space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
     else:
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
+        # We instantiate our new powerful agent instead of the BasicAgent
         agent = LlamaIndexAgent()
     except Exception as e:
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
                     "Submitted Answer": f"AGENT ERROR: {e}",
                 }
             )
     if not answers_payload:
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload,
     }
     try:
         response = requests.post(submit_url, json=submission_data, timeout=180)
         response.raise_for_status()
         )
+# --- Build Gradio Interface using Blocks ---
+# UI HAS BEEN REVERTED TO THE INITIAL TEMPLATE AS REQUESTED
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()

requirements.txt CHANGED Viewed

@@ -6,9 +6,9 @@ torch
 transformers
 accelerate
 bitsandbytes
-# Dependencies for tools
 youtube-transcript-api
 beautifulsoup4
 llama-index-tools-duckduckgo
-# CORRECTED: The missing package for HuggingFaceLLM
-llama-index-llms-huggingface

 transformers
 accelerate
 bitsandbytes
 youtube-transcript-api
 beautifulsoup4
 llama-index-tools-duckduckgo
+llama-index-llms-huggingface
+# A reliable library for safe math evaluation
+numexpr