HF_Agents_Final_Assignment

Build error

App Files Files Community

leofltt commited on Jun 17

Commit

3dd74bd

1 Parent(s): 7737a19

lazy load image model

Browse files

Files changed (1) hide show

app.py +26 -15

app.py CHANGED Viewed

@@ -43,6 +43,9 @@ Your final answer: FINAL ANSWER: Paris
 # --- Tool Definitions ---
 @tool
 def web_search(query: str):
@@ -66,14 +69,19 @@ def math_calculator(expression: str):
 @tool
 def image_analyzer(image_url: str):
-    """Analyzes an image and returns a description."""
     print(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
     try:
-        # Using a CPU-friendly image-to-text model
-        image_to_text = pipeline(
-            "image-to-text", model="Salesforce/blip-image-captioning-base"
-        )
-        description = image_to_text(image_url)[0]["generated_text"]
         return description
     except Exception as e:
         return f"Error analyzing image: {e}"
@@ -112,7 +120,8 @@ class GaiaAgent:
         ]
         # Initialize the LLM
-        print("Loading LLM...")
         llm = HuggingFacePipeline.from_model_id(
             model_id="microsoft/Phi-3-mini-4k-instruct",
             task="text-generation",
@@ -121,12 +130,12 @@ class GaiaAgent:
                 "top_k": 50,
                 "temperature": 0.1,
                 "do_sample": False,
-                "torch_dtype": "auto",  # Let transformers figure out the best dtype
-                "device_map": "auto",
             },
-            trust_remote_code=True,  # Phi-3 requires this
         )
-        print("LLM loaded.")
         # Create the agent graph
         prompt = PromptTemplate(
@@ -142,7 +151,7 @@ Question: {question}
         self.agent = prompt | llm | StrOutputParser()
         self.graph = self._create_graph()
-        print("GaiaAgent initialized.")
     def _create_graph(self):
         graph = StateGraph(AgentState)
@@ -175,8 +184,8 @@ Question: {question}
         print("--- Calling Tools ---")
         raw_tool_call = state["messages"][-1]
-        # Simple regex to find tool calls like tool_name("argument")
-        tool_call_match = re.search(r"(\w+)\((.*?)\)", raw_tool_call)
         if not tool_call_match:
             return {"messages": ["No valid tool call found."], "sender": "tools"}
@@ -184,7 +193,9 @@ Question: {question}
         tool_input_str = tool_call_match.group(2).strip()
         # Remove quotes from the input string if they exist
-        if tool_input_str.startswith('"') and tool_input_str.endswith('"'):
             tool_input = tool_input_str[1:-1]
         else:
             tool_input = tool_input_str

 # --- Tool Definitions ---
+# Global variable to cache the image-to-text pipeline. This allows for "lazy loading".
+image_to_text_pipeline = None
 @tool
 def web_search(query: str):
 @tool
 def image_analyzer(image_url: str):
+    """Analyzes an image and returns a description. Loads the model on first use."""
+    global image_to_text_pipeline
     print(f"--- Calling Image Analyzer Tool with URL: {image_url} ---")
     try:
+        if image_to_text_pipeline is None:
+            print("--- Initializing Image Analyzer pipeline for the first time... ---")
+            # Lazy-load the pipeline to conserve memory on startup
+            image_to_text_pipeline = pipeline(
+                "image-to-text", model="Salesforce/blip-image-captioning-base"
+            )
+            print("--- Image Analyzer pipeline initialized. ---")
+        description = image_to_text_pipeline(image_url)[0]["generated_text"]
         return description
     except Exception as e:
         return f"Error analyzing image: {e}"
         ]
         # Initialize the LLM
+        print("Loading LLM... This may take a few minutes on first startup.")
+        # Using a smaller, CPU-friendly model to avoid memory issues on Hugging Face Spaces
         llm = HuggingFacePipeline.from_model_id(
             model_id="microsoft/Phi-3-mini-4k-instruct",
             task="text-generation",
                 "top_k": 50,
                 "temperature": 0.1,
                 "do_sample": False,
             },
+            torch_dtype="auto",
+            trust_remote_code=True,  # Required for Phi-3
+            device_map="auto",
         )
+        print("LLM loaded successfully.")
         # Create the agent graph
         prompt = PromptTemplate(
         self.agent = prompt | llm | StrOutputParser()
         self.graph = self._create_graph()
+        print("GaiaAgent initialized successfully.")
     def _create_graph(self):
         graph = StateGraph(AgentState)
         print("--- Calling Tools ---")
         raw_tool_call = state["messages"][-1]
+        # Simple regex to find tool calls like tool_name("argument") or tool_name(argument)
+        tool_call_match = re.search(r"(\w+)\s*\((.*?)\)", raw_tool_call, re.DOTALL)
         if not tool_call_match:
             return {"messages": ["No valid tool call found."], "sender": "tools"}
         tool_input_str = tool_call_match.group(2).strip()
         # Remove quotes from the input string if they exist
+        if (tool_input_str.startswith('"') and tool_input_str.endswith('"')) or (
+            tool_input_str.startswith("'") and tool_input_str.endswith("'")
+        ):
             tool_input = tool_input_str[1:-1]
         else:
             tool_input = tool_input_str