HF_Agents_Final_Assignment

Build error

App Files Files Community

leofltt commited on Jun 21

Commit

2f87e44

1 Parent(s): 8746908

new v

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +94 -125

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Template Final Assignment
 emoji: 🕵🏻‍♂️
 colorFrom: indigo
 colorTo: indigo

 ---
+title: HF Agents Course Final Assignment
 emoji: 🕵🏻‍♂️
 colorFrom: indigo
 colorTo: indigo

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (Final Version)
 import os
 import re
@@ -9,28 +9,48 @@ import logging
 import numexpr
 from typing import TypedDict, Annotated
-# --- Langchain & HF Imports ---
 from langchain_huggingface import HuggingFaceEndpoint
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.tools import tool
 from langgraph.graph import StateGraph, END
 from langchain_community.document_loaders.youtube import YoutubeLoader
-from transformers.pipelines import pipeline as hf_pipeline  # Renamed to avoid conflict
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-SYSTEM_PROMPT = """You are a helpful and expert assistant named GAIA, designed to answer questions accurately. To do this, you have access to a set of tools. Based on the user's question, you must decide which tool to use, if any. Your process is:
-1.  **Analyze the Question**: Understand what is being asked.
-2.  **Select a Tool**: If necessary, choose the best tool. Your available tools are: `web_search`, `math_calculator`, `image_analyzer`, `youtube_transcript_reader`.
-3.  **Call the Tool**: Output a tool call in the format `tool_name("argument")`. For example: `web_search("what is the weather in Paris?")`.
-4.  **Analyze the Result**: Look at the tool's output.
-5.  **Final Answer**: If you have enough information, provide the final answer. If not, you can use another tool.
-When you have the final answer, you **must** output it in the following format, and nothing else:
-FINAL ANSWER: [YOUR FINAL ANSWER]"""
-# --- Tool Definitions ---
 image_to_text_pipeline = None
@@ -52,7 +72,6 @@ def math_calculator(expression: str) -> str:
         result = numexpr.evaluate(expression).item()
         return str(result)
     except Exception as e:
-        logging.error(f"Calculator error: {e}")
         return f"Error: {e}"
@@ -63,50 +82,33 @@ def image_analyzer(image_url: str) -> str:
     logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
     try:
         if image_to_text_pipeline is None:
-            logging.info(
-                "--- Initializing Image Analyzer pipeline (lazy loading)... ---"
-            )
             image_to_text_pipeline = hf_pipeline(
                 "image-to-text", model="Salesforce/blip-image-captioning-base"
             )
-            logging.info("--- Image Analyzer pipeline initialized. ---")
-        pipeline_output = image_to_text_pipeline(image_url)
-        if (
-            pipeline_output
-            and isinstance(pipeline_output, list)
-            and len(pipeline_output) > 0
-        ):
-            description = pipeline_output[0].get(
-                "generated_text", "Error: Could not generate text."
-            )
-        else:
-            description = "Error: Could not analyze image."
         return description
     except Exception as e:
-        logging.error(f"Error analyzing image: {e}")
         return f"Error analyzing image: {e}"
 @tool
 def youtube_transcript_reader(youtube_url: str) -> str:
     """Reads the transcript of a YouTube video from its URL."""
-    logging.info(
-        f"--- Calling YouTube Transcript Reader Tool with URL: {youtube_url} ---"
-    )
     try:
         loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
-        docs = loader.load()
-        transcript = " ".join([doc.page_content for doc in docs])
-        return transcript[:4000]
     except Exception as e:
-        logging.error(f"Error reading YouTube transcript: {e}")
-        return f"Error: {e}"
 class AgentState(TypedDict):
     question: str
     messages: Annotated[list, lambda x, y: x + y]
-    sender: str
 class GaiaAgent:
@@ -119,54 +121,27 @@ class GaiaAgent:
             youtube_transcript_reader,
         ]
-        # --- THIS IS THE CORRECTED LLM INITIALIZATION ---
-        logging.info("Initializing LLM via modern HuggingFaceEndpoint...")
         llm = HuggingFaceEndpoint(
-            repo_id="HuggingFaceH4/zephyr-7b-beta",
             temperature=0.1,
             max_new_tokens=1024,
-            huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
         )
-        logging.info("LLM initialized successfully.")
-        # The rest of the class remains the same
-        prompt = PromptTemplate(
-            template=SYSTEM_PROMPT
-            + "\nHere is the current conversation:\n{messages}\n\nQuestion: {question}",
-            input_variables=["messages", "question"],
         )
         self.agent = prompt | llm | StrOutputParser()
         self.graph = self._create_graph()
         logging.info("GaiaAgent initialized successfully.")
-    def _create_graph(self):
-        graph = StateGraph(AgentState)
-        graph.add_node("agent", self._call_agent)
-        graph.add_node("tools", self._call_tools)
-        graph.add_conditional_edges(
-            "agent", self._decide_action, {END: END, "tools": "tools"}
-        )
-        graph.add_edge("tools", "agent")
-        graph.set_entry_point("agent")
-        return graph.compile()
     def _call_agent(self, state: AgentState):
         logging.info("--- Calling Agent ---")
-        message_history = "\n".join(state["messages"])
-        response = self.agent.invoke(
-            {"messages": message_history, "question": state["question"]}
-        )
-        return {"messages": [response], "sender": "agent"}
-    def _decide_action(self, state: AgentState):
-        logging.info("--- Deciding Action ---")
-        response = state["messages"][-1]
-        if "FINAL ANSWER:" in response:
-            return END
-        else:
-            return "tools"
     def _call_tools(self, state: AgentState):
         logging.info("--- Calling Tools ---")
@@ -176,57 +151,66 @@ class GaiaAgent:
             logging.warning("No valid tool call found in agent response.")
             return {
                 "messages": [
-                    'No valid tool call found. Please format your response as `tool_name("argument")` or provide a `FINAL ANSWER:`.'
-                ],
-                "sender": "tools",
             }
         tool_name = tool_call_match.group(1).strip()
-        tool_input_str = tool_call_match.group(2).strip()
-        if (tool_input_str.startswith('"') and tool_input_str.endswith('"')) or (
-            tool_input_str.startswith("'") and tool_input_str.endswith("'")
-        ):
-            tool_input = tool_input_str[1:-1]
-        else:
-            tool_input = tool_input_str
         tool_to_call = next((t for t in self.tools if t.name == tool_name), None)
         if tool_to_call:
             try:
-                result = tool_to_call.run(tool_input)
-                return {"messages": [str(result)], "sender": "tools"}
             except Exception as e:
-                logging.error(f"Error executing tool {tool_name}: {e}")
-                return {
-                    "messages": [f"Error executing tool {tool_name}: {e}"],
-                    "sender": "tools",
-                }
         else:
-            logging.warning(f"Tool '{tool_name}' not found.")
-            return {"messages": [f"Tool '{tool_name}' not found."], "sender": "tools"}
     def __call__(self, question: str) -> str:
         logging.info(f"Agent received question: {question[:100]}...")
         try:
-            initial_state = {"question": question, "messages": [], "sender": "user"}
             final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
             final_response = final_state["messages"][-1]
             match = re.search(
                 r"FINAL ANSWER:\s*(.*)", final_response, re.IGNORECASE | re.DOTALL
             )
-            if match:
-                extracted_answer = match.group(1).strip()
-                logging.info(f"Agent returning final answer: {extracted_answer}")
-                return extracted_answer
-            else:
-                logging.warning(
-                    "Agent could not find a final answer. Returning the last message."
-                )
-                return final_response
         except Exception as e:
             logging.error(f"Error during agent invocation: {e}", exc_info=True)
-            return f"Error during agent invocation: {e}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please Login to Hugging Face.", None
     username = profile.username
@@ -249,13 +233,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
         response.raise_for_status()
         questions_data = response.json()
-        if not questions_data:
-            return "Fetched questions list is empty.", None
-        logging.info(f"Successfully fetched {len(questions_data)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    # The loop will now process the full 'questions_data' list
     logging.info(
         f"FULL EVALUATION MODE: Processing all {len(questions_data)} questions..."
     )
@@ -304,34 +284,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         )
         response.raise_for_status()
         result_data = response.json()
-        status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
         return status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
-    gr.Markdown(
-        "This agent uses LangGraph and Mistral-7B to answer questions from the GAIA benchmark."
-    )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(
-        label="Run Status / Submission Result", lines=5, interactive=False
-    )
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
     )
-    logging.info("App Starting (Final Version)...")
     demo.launch()

+# app.py (Refactored for Improved Performance)
 import os
 import re
 import numexpr
 from typing import TypedDict, Annotated
+# --- Langchain & HF Imports (Modern and Correct) ---
 from langchain_huggingface import HuggingFaceEndpoint
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.tools import tool
 from langgraph.graph import StateGraph, END
+from langgraph.errors import GraphRecursionError
 from langchain_community.document_loaders.youtube import YoutubeLoader
+from transformers import pipeline as hf_pipeline  # Renamed to avoid conflict
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+### --- REFACTOR 1: A much stricter and more detailed System Prompt --- ###
+SYSTEM_PROMPT = """You are GAIA, a powerful expert assistant. You are designed to answer questions accurately and efficiently by using a set of available tools.
+**Your STRICT Process:**
+1.  **Analyze the User's Question:** Carefully determine the user's intent and what information is needed.
+2.  **Tool Selection and Execution:**
+    * **Is a tool necessary?**
+        * For questions about public information, facts, current events, statistics, people, companies, etc., you **MUST** use the `web_search` tool. Do not rely on your internal knowledge.
+        * If the question includes a URL pointing to an image (`.png`, `.jpg`, etc.), you **MUST** use the `image_analyzer` tool.
+        * If the question includes a YouTube URL, you **MUST** use the `youtube_transcript_reader` tool.
+        * If the question requires a calculation, you **MUST** use the `math_calculator` tool.
+        * If the question is a simple logic puzzle, riddle, or language task you can solve directly, you do not need a tool.
+    * **Tool Call Format:** To use a tool, you **MUST** respond with **only** the tool call on a single line. Do not add any other text or explanation.
+        * Example: `web_search("How many albums did Mercedes Sosa release after 2000?")`
+3.  **Analyze Tool Output:**
+    * Review the information returned by the tool.
+    * If you have enough information to answer the user's question, proceed to the final step.
+    * If you need more information, you may use another tool.
+4.  **Final Answer:**
+    * Once you have a definitive answer, you **MUST** format it as follows, and nothing else:
+        `FINAL ANSWER: [Your concise and accurate answer]`
+"""
+# --- Tool Definitions (Unchanged) ---
 image_to_text_pipeline = None
         result = numexpr.evaluate(expression).item()
         return str(result)
     except Exception as e:
         return f"Error: {e}"
     logging.info(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
     try:
         if image_to_text_pipeline is None:
+            logging.info("--- Initializing Image Analyzer pipeline... ---")
             image_to_text_pipeline = hf_pipeline(
                 "image-to-text", model="Salesforce/blip-image-captioning-base"
             )
+        description = image_to_text_pipeline(image_url)[0].get(
+            "generated_text", "Error"
+        )
         return description
     except Exception as e:
         return f"Error analyzing image: {e}"
 @tool
 def youtube_transcript_reader(youtube_url: str) -> str:
     """Reads the transcript of a YouTube video from its URL."""
+    logging.info(f"--- Calling YouTube Transcript Reader with URL: {youtube_url} ---")
     try:
         loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
+        return " ".join([doc.page_content for doc in loader.load()])[:4000]
     except Exception as e:
+        return f"Error reading YouTube transcript: {e}"
+# --- Agent State & Graph (Unchanged) ---
 class AgentState(TypedDict):
     question: str
     messages: Annotated[list, lambda x, y: x + y]
 class GaiaAgent:
             youtube_transcript_reader,
         ]
+        ### --- REFACTOR 2: Switched to the more powerful Mistral-7B model --- ###
+        # IMPORTANT: Make sure you have accepted the terms of use for this model on the Hugging Face Hub!
+        logging.info("Initializing LLM with Mistral-7B...")
         llm = HuggingFaceEndpoint(
+            repo_id="mistralai/Mistral-7B-Instruct-v0.2",
             temperature=0.1,
             max_new_tokens=1024,
+            huggingface_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
         )
+        prompt = PromptTemplate.from_template(
+            SYSTEM_PROMPT + "\n{messages}\n\nQuestion: {question}"
         )
         self.agent = prompt | llm | StrOutputParser()
         self.graph = self._create_graph()
         logging.info("GaiaAgent initialized successfully.")
     def _call_agent(self, state: AgentState):
         logging.info("--- Calling Agent ---")
+        response = self.agent.invoke(state)
+        return {"messages": [response]}
     def _call_tools(self, state: AgentState):
         logging.info("--- Calling Tools ---")
             logging.warning("No valid tool call found in agent response.")
             return {
                 "messages": [
+                    "No valid tool call found. Please try again or provide a FINAL ANSWER."
+                ]
             }
         tool_name = tool_call_match.group(1).strip()
+        tool_input_str = tool_call_match.group(2).strip().strip("'\"")
         tool_to_call = next((t for t in self.tools if t.name == tool_name), None)
         if tool_to_call:
             try:
+                result = tool_to_call.run(tool_input_str)
+                return {"messages": [str(result)]}
             except Exception as e:
+                return {"messages": [f"Error executing tool {tool_name}: {e}"]}
         else:
+            return {
+                "messages": [
+                    f"Tool '{tool_name}' not found. Available tools: web_search, math_calculator, image_analyzer, youtube_transcript_reader."
+                ]
+            }
+    def _decide_action(self, state: AgentState):
+        return "tools" if "FINAL ANSWER:" not in state["messages"][-1] else END
+    def _create_graph(self):
+        graph = StateGraph(AgentState)
+        graph.add_node("agent", self._call_agent)
+        graph.add_node("tools", self._call_tools)
+        graph.add_conditional_edges(
+            "agent", self._decide_action, {"tools": "tools", END: END}
+        )
+        graph.add_edge("tools", "agent")
+        graph.set_entry_point("agent")
+        return graph.compile()
     def __call__(self, question: str) -> str:
         logging.info(f"Agent received question: {question[:100]}...")
         try:
+            initial_state = {"question": question, "messages": []}
+            ### --- REFACTOR 3: Gracefully handle recursion errors --- ###
             final_state = self.graph.invoke(initial_state, {"recursion_limit": 15})
             final_response = final_state["messages"][-1]
             match = re.search(
                 r"FINAL ANSWER:\s*(.*)", final_response, re.IGNORECASE | re.DOTALL
             )
+            return (
+                match.group(1).strip() if match else "Could not determine final answer."
+            )
+        except GraphRecursionError:
+            logging.error("Agent got stuck in a loop.")
+            return "Agent Error: Stuck in a loop."
         except Exception as e:
             logging.error(f"Error during agent invocation: {e}", exc_info=True)
+            return f"Error: {e}"
+# --- Main Application Logic (Unchanged) ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    # This function is restored to run all questions.
+    # ... (The rest of this function and the Gradio UI code is the same as the last working version) ...
     if not profile:
         return "Please Login to Hugging Face.", None
     username = profile.username
         response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
     logging.info(
         f"FULL EVALUATION MODE: Processing all {len(questions_data)} questions..."
     )
         )
         response.raise_for_status()
         result_data = response.json()
+        status = f"Submission Successful!\nScore: {result_data.get('score', 'N/A')}%"
         return status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
+# --- Gradio Interface (Unchanged) ---
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.LoginButton()
+    run_button = gr.Button("Run Full Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Result", lines=4)
+    results_table = gr.DataFrame(label="Questions and Answers", wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
     )
     demo.launch()