leofltt commited on
Commit
17619f0
·
1 Parent(s): 45921e7
Files changed (1) hide show
  1. agent.py +39 -23
agent.py CHANGED
@@ -25,14 +25,20 @@ import torchaudio
25
  from langgraph.graph import StateGraph, END, START
26
  from langgraph.prebuilt import ToolNode
27
  from typing import TypedDict, Annotated, List, Union
28
- from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, BaseMessage
 
 
 
 
 
 
29
  import operator
30
  import json
31
  from langchain_community.llms import LlamaCpp
32
  from huggingface_hub import hf_hub_download
33
 
34
  # (Keep Constants as is)
35
- # --- Constants -- -
36
  default_api_url = "https://agents-course-unit4-scoring.hf.space"
37
 
38
 
@@ -60,12 +66,12 @@ def python_interpreter(code: str) -> str:
60
  def file_reader_tool(url: str) -> str:
61
  """
62
  Reads the content of a file from a URL and returns the entire content as a string.
63
- It can handle Excel (.xlsx, .xls) and CSV (.csv) files.
64
  The URL must point directly to the file.
65
  """
66
  try:
67
  headers = {
68
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
69
  }
70
  # Download the file content
71
  response = requests.get(url, headers=headers)
@@ -120,7 +126,7 @@ def youtube_transcript_tool(url: str) -> str:
120
  ytt_api = YouTubeTranscriptApi()
121
  try:
122
  transcript_list = ytt_api.list(video_id)
123
- transcript = transcript_list.find_transcript(['en']).fetch()
124
  transcript_text = " ".join([item["text"] for item in transcript])
125
  return transcript_text
126
  except NoTranscriptFound:
@@ -136,7 +142,7 @@ def web_reader_tool(url: str) -> str:
136
  """
137
  try:
138
  headers = {
139
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
140
  }
141
  response = requests.get(url, headers=headers)
142
  response.raise_for_status()
@@ -225,7 +231,9 @@ def image_analyzer_tool(url: str) -> str:
225
  try:
226
  print(f"Analyzing image from URL: {url}")
227
  # Initialize the image captioning pipeline
228
- captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 
 
229
 
230
  # Analyze the image
231
  caption = captioner(url)
@@ -233,12 +241,14 @@ def image_analyzer_tool(url: str) -> str:
233
  except Exception as e:
234
  return f"Error analyzing image: {e}"
235
 
 
236
  class AgentState(TypedDict):
237
  input: str
238
  chat_history: list[BaseMessage]
239
  agent_outcome: Union[AgentAction, AgentFinish, None]
240
  intermediate_steps: Annotated[list[tuple[AgentAction, str]], operator.add]
241
 
 
242
  # --- Basic Agent Definition ---
243
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
244
  class BasicAgent:
@@ -261,9 +271,10 @@ class BasicAgent:
261
 
262
  self.llm = LlamaCpp(
263
  model_path=model_path,
264
- n_ctx=4096, # Context window size
265
- n_gpu_layers=0, # Set to 0 to use CPU only
266
- verbose=True, # For debugging
 
267
  )
268
 
269
  tools = [
@@ -280,7 +291,7 @@ class BasicAgent:
280
  Tool(
281
  name="File Reader from URL",
282
  func=file_reader_tool,
283
- description="Reads the content of a file from a URL and returns the first 5 rows as a string. It can handle Excel (.xlsx, .xls) and CSV (.csv) files. The URL must point directly to the file.",
284
  ),
285
  WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
286
  Tool(
@@ -309,12 +320,12 @@ class BasicAgent:
309
  description="Analyzes an image from a URL and returns a description of its content. The URL must point directly to an image file (e.g., .jpg, .png).",
310
  ),
311
  ]
312
- prompt_template_str = '''<|system|>
313
  You are a helpful and concise assistant. You have access to the following tools:
314
 
315
  {tools}
316
 
317
- To use a tool, respond with a JSON blob with "action" and "action_input" keys.
318
 
319
  If you have the final answer, respond with:
320
  FINAL ANSWER: [your answer]
@@ -348,7 +359,7 @@ FINAL ANSWER: 3<|end|>
348
 
349
  Question: {input}<|end|>
350
  <|assistant|>
351
- '''
352
  prompt = PromptTemplate.from_template(prompt_template_str).partial(
353
  tools=render_text_description(tools),
354
  tool_names=", ".join([t.name for t in tools]),
@@ -381,13 +392,13 @@ Question: {input}<|end|>
381
  def execute_tools(state):
382
  agent_action = state["agent_outcome"]
383
  tool_name = agent_action.tool
384
-
385
  tool_to_use = None
386
  for tool in tools:
387
  if tool.name == tool_name:
388
  tool_to_use = tool
389
  break
390
-
391
  if not tool_to_use:
392
  output = f"Error: Tool '{tool_name}' not found."
393
  else:
@@ -421,14 +432,16 @@ Question: {input}<|end|>
421
 
422
  def _parse_agent_output(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
423
  if "FINAL ANSWER:" in llm_output:
 
 
424
  return AgentFinish(
425
- return_values={"output": llm_output.split("FINAL ANSWER:")[-1].strip()},
426
  log=llm_output,
427
  )
428
 
429
  # Regex to find ```json ... ``` or raw JSON
430
- match = re.search(r'''(?:```json\n)?({.*?})(?:\n```)?''', llm_output, re.DOTALL)
431
-
432
  if match:
433
  json_str = match.group(1).strip()
434
  try:
@@ -443,7 +456,7 @@ Question: {input}<|end|>
443
  pass # Fall through to the next check if JSON is invalid
444
 
445
  # Regex to find python code
446
- match = re.search(r'''```(?:python\n)?(.*?)```''', llm_output, re.DOTALL)
447
  if match:
448
  code = match.group(1).strip()
449
  return AgentAction(
@@ -452,15 +465,18 @@ Question: {input}<|end|>
452
  log=llm_output,
453
  )
454
 
455
- return AgentFinish(return_values={"output": f"Could not parse LLM output: {llm_output}"}, log=llm_output)
 
 
 
456
 
457
  def __call__(self, question: str) -> str:
458
  print(f"Agent received question (first 50 chars): {question[:50]}...")
459
  try:
460
  result = self.agent_executor.invoke({"input": question, "chat_history": []})
461
- analysis = result['agent_outcome'].return_values["output"]
462
  print(f"Agent returning analysis: {analysis}")
463
  return analysis
464
  except Exception as e:
465
  print(f"Error during Langchain invocation: {e}")
466
- return f"Error analyzing question: {e}"
 
25
  from langgraph.graph import StateGraph, END, START
26
  from langgraph.prebuilt import ToolNode
27
  from typing import TypedDict, Annotated, List, Union
28
+ from langchain_core.messages import (
29
+ AnyMessage,
30
+ SystemMessage,
31
+ HumanMessage,
32
+ ToolMessage,
33
+ BaseMessage,
34
+ )
35
  import operator
36
  import json
37
  from langchain_community.llms import LlamaCpp
38
  from huggingface_hub import hf_hub_download
39
 
40
  # (Keep Constants as is)
41
+ # --- Constants -- -
42
  default_api_url = "https://agents-course-unit4-scoring.hf.space"
43
 
44
 
 
66
  def file_reader_tool(url: str) -> str:
67
  """
68
  Reads the content of a file from a URL and returns the entire content as a string.
69
+ It can handle Excel (.xlsx, .xls) and CSV (.csv) files.
70
  The URL must point directly to the file.
71
  """
72
  try:
73
  headers = {
74
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
75
  }
76
  # Download the file content
77
  response = requests.get(url, headers=headers)
 
126
  ytt_api = YouTubeTranscriptApi()
127
  try:
128
  transcript_list = ytt_api.list(video_id)
129
+ transcript = transcript_list.find_transcript(["en"]).fetch()
130
  transcript_text = " ".join([item["text"] for item in transcript])
131
  return transcript_text
132
  except NoTranscriptFound:
 
142
  """
143
  try:
144
  headers = {
145
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
146
  }
147
  response = requests.get(url, headers=headers)
148
  response.raise_for_status()
 
231
  try:
232
  print(f"Analyzing image from URL: {url}")
233
  # Initialize the image captioning pipeline
234
+ captioner = pipeline(
235
+ "image-to-text", model="Salesforce/blip-image-captioning-large"
236
+ )
237
 
238
  # Analyze the image
239
  caption = captioner(url)
 
241
  except Exception as e:
242
  return f"Error analyzing image: {e}"
243
 
244
+
245
  class AgentState(TypedDict):
246
  input: str
247
  chat_history: list[BaseMessage]
248
  agent_outcome: Union[AgentAction, AgentFinish, None]
249
  intermediate_steps: Annotated[list[tuple[AgentAction, str]], operator.add]
250
 
251
+
252
  # --- Basic Agent Definition ---
253
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
254
  class BasicAgent:
 
271
 
272
  self.llm = LlamaCpp(
273
  model_path=model_path,
274
+ n_ctx=4096, # Context window size
275
+ n_gpu_layers=0, # Set to 0 to use CPU only
276
+ verbose=True, # For debugging
277
+ stop=["<|end|>"],
278
  )
279
 
280
  tools = [
 
291
  Tool(
292
  name="File Reader from URL",
293
  func=file_reader_tool,
294
+ description="Reads the content of a file from a URL and returns the entire content as a string. It can handle Excel (.xlsx, .xls) and CSV (.csv) files. The URL must point directly to the file.",
295
  ),
296
  WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
297
  Tool(
 
320
  description="Analyzes an image from a URL and returns a description of its content. The URL must point directly to an image file (e.g., .jpg, .png).",
321
  ),
322
  ]
323
+ prompt_template_str = """<|system|>
324
  You are a helpful and concise assistant. You have access to the following tools:
325
 
326
  {tools}
327
 
328
+ To use a tool, respond with a JSON blob with "action" and "action_input" keys, inside a ```json markdown block. For the Python Interpreter, you can use a ```python markdown block.
329
 
330
  If you have the final answer, respond with:
331
  FINAL ANSWER: [your answer]
 
359
 
360
  Question: {input}<|end|>
361
  <|assistant|>
362
+ """
363
  prompt = PromptTemplate.from_template(prompt_template_str).partial(
364
  tools=render_text_description(tools),
365
  tool_names=", ".join([t.name for t in tools]),
 
392
  def execute_tools(state):
393
  agent_action = state["agent_outcome"]
394
  tool_name = agent_action.tool
395
+
396
  tool_to_use = None
397
  for tool in tools:
398
  if tool.name == tool_name:
399
  tool_to_use = tool
400
  break
401
+
402
  if not tool_to_use:
403
  output = f"Error: Tool '{tool_name}' not found."
404
  else:
 
432
 
433
  def _parse_agent_output(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
434
  if "FINAL ANSWER:" in llm_output:
435
+ answer = llm_output.split("FINAL ANSWER:")[-1].strip()
436
+ answer = answer.split("\n")[0]
437
  return AgentFinish(
438
+ return_values={"output": answer},
439
  log=llm_output,
440
  )
441
 
442
  # Regex to find ```json ... ``` or raw JSON
443
+ match = re.search(r"""(?:```json\n)?({.*?})(?:\n```)?""", llm_output, re.DOTALL)
444
+
445
  if match:
446
  json_str = match.group(1).strip()
447
  try:
 
456
  pass # Fall through to the next check if JSON is invalid
457
 
458
  # Regex to find python code
459
+ match = re.search(r"""```(?:python\n)?(.*?)```""", llm_output, re.DOTALL)
460
  if match:
461
  code = match.group(1).strip()
462
  return AgentAction(
 
465
  log=llm_output,
466
  )
467
 
468
+ return AgentFinish(
469
+ return_values={"output": f"Could not parse LLM output: {llm_output}"},
470
+ log=llm_output,
471
+ )
472
 
473
  def __call__(self, question: str) -> str:
474
  print(f"Agent received question (first 50 chars): {question[:50]}...")
475
  try:
476
  result = self.agent_executor.invoke({"input": question, "chat_history": []})
477
+ analysis = result["agent_outcome"].return_values["output"]
478
  print(f"Agent returning analysis: {analysis}")
479
  return analysis
480
  except Exception as e:
481
  print(f"Error during Langchain invocation: {e}")
482
+ return f"Error analyzing question: {e}"