Renzo commited on
Commit
c42da51
·
1 Parent(s): 65822ef

wip: added tool to convert audio to text and improve fetch file and save to temp location

Browse files
Files changed (4) hide show
  1. agent.py +2 -1
  2. requirements.txt +2 -1
  3. tools.py +27 -13
  4. utils.py +32 -0
agent.py CHANGED
@@ -11,7 +11,8 @@ model = {
11
  "grok": xAI(id="grok-3-beta"),
12
  "gpt": OpenAIChat(id="gpt-4.1-nano"),
13
  "gemini": Gemini(id="gemini-2.5-pro-preview-03-25"),
14
- "open_router": OpenRouter(id="qwen/qwen3-235b-a22b")
 
15
  }
16
 
17
 
 
11
  "grok": xAI(id="grok-3-beta"),
12
  "gpt": OpenAIChat(id="gpt-4.1-nano"),
13
  "gemini": Gemini(id="gemini-2.5-pro-preview-03-25"),
14
+ # "open_router": OpenRouter(id="qwen/qwen3-235b-a22b")
15
+ "open_router": OpenRouter(id="google/gemini-2.5-pro-exp-03-25")
16
  }
17
 
18
 
requirements.txt CHANGED
@@ -6,4 +6,5 @@ duckduckgo-search
6
  wikipedia
7
  google
8
  google-genai
9
- httpx
 
 
6
  wikipedia
7
  google
8
  google-genai
9
+ httpx
10
+ openai-whisper
tools.py CHANGED
@@ -1,14 +1,13 @@
1
  import os
2
 
3
- import httpx
4
  from agno.tools.duckduckgo import DuckDuckGoTools
5
  from agno.tools.reasoning import ReasoningTools
 
6
  from agno.tools.wikipedia import WikipediaTools
7
 
8
  BASE_STORAGE_ROOT = os.getenv("AGENT_STORAGE_ROOT", os.path.join(os.getcwd(), "agent_storage"))
9
 
10
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
-
12
 
13
  def get_file_from_task_id(task_id: str) -> str:
14
  """
@@ -18,28 +17,24 @@ def get_file_from_task_id(task_id: str) -> str:
18
  task_id (str): Identifier that points to the remote file.
19
 
20
  Returns:
21
- str: task_id to be used by other tools to read the file
22
  """
23
- # ensure storage directory exists
24
  task_dir = os.path.join(BASE_STORAGE_ROOT, task_id)
25
  os.makedirs(task_dir, exist_ok=True)
26
 
27
- # filename derived from task_id
28
  filename = task_id
29
  file_path = os.path.join(task_dir, filename)
30
 
31
- # if file already exists, return
32
  if os.path.exists(file_path):
33
  print("[INFO] Using cached file:", file_path)
34
  return file_path
35
 
36
- # fetch content from remote
37
- response = httpx.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15, follow_redirects=True)
38
- response.raise_for_status()
39
 
40
- # write content to file
41
  with open(file_path, "wb") as f:
42
- f.write(response.content)
43
 
44
  return file_path
45
 
@@ -60,10 +55,29 @@ def read_file_from_task_id(task_id: str) -> str:
60
  return f.read()
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  tools = [
64
  ReasoningTools(think=True, add_few_shot=True),
65
  DuckDuckGoTools(fixed_max_results=5),
66
  WikipediaTools(),
67
  get_file_from_task_id,
68
- read_file_from_task_id
 
69
  ]
 
1
  import os
2
 
3
+ import whisper
4
  from agno.tools.duckduckgo import DuckDuckGoTools
5
  from agno.tools.reasoning import ReasoningTools
6
+ from utils import fetch_file_content
7
  from agno.tools.wikipedia import WikipediaTools
8
 
9
  BASE_STORAGE_ROOT = os.getenv("AGENT_STORAGE_ROOT", os.path.join(os.getcwd(), "agent_storage"))
10
 
 
 
11
 
12
  def get_file_from_task_id(task_id: str) -> str:
13
  """
 
17
  task_id (str): Identifier that points to the remote file.
18
 
19
  Returns:
20
+ str: Path to the downloaded file.
21
  """
 
22
  task_dir = os.path.join(BASE_STORAGE_ROOT, task_id)
23
  os.makedirs(task_dir, exist_ok=True)
24
 
 
25
  filename = task_id
26
  file_path = os.path.join(task_dir, filename)
27
 
 
28
  if os.path.exists(file_path):
29
  print("[INFO] Using cached file:", file_path)
30
  return file_path
31
 
32
+ # Use the utility function to fetch content
33
+ result = fetch_file_content(task_id)
34
+ content = result["content"]
35
 
 
36
  with open(file_path, "wb") as f:
37
+ f.write(content)
38
 
39
  return file_path
40
 
 
55
  return f.read()
56
 
57
 
58
+ def convert_audio_to_text(task_id: str) -> str:
59
+ """
60
+ Use this to download an audio and convert it to text
61
+
62
+ Args:
63
+ task_id (str): Identifier that points to the remote file.
64
+
65
+ Returns:
66
+ str: the transcript of the audio in text
67
+ """
68
+
69
+ result = fetch_file_content(task_id, temp=True)
70
+ model = whisper.load_model("turbo")
71
+ result_whisper = model.transcribe(audio=result["path"])
72
+ print("[convert_audio_to_text]", result_whisper["text"])
73
+ return result_whisper["text"]
74
+
75
+
76
  tools = [
77
  ReasoningTools(think=True, add_few_shot=True),
78
  DuckDuckGoTools(fixed_max_results=5),
79
  WikipediaTools(),
80
  get_file_from_task_id,
81
+ read_file_from_task_id,
82
+ convert_audio_to_text
83
  ]
utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ import tempfile
3
+ import mimetypes
4
+ import os
5
+
6
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
7
+
8
+
9
+ def fetch_file_content(task_id: str, temp: bool = False) -> dict:
10
+ """
11
+ Downloads file content for the given task_id.
12
+ Returns a dict with:
13
+ - content: bytes of the file
14
+ - path: filesystem path to a temp file if temp=True, else None
15
+ """
16
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
17
+ resp = httpx.get(url, timeout=15, follow_redirects=True)
18
+ resp.raise_for_status()
19
+ content = resp.content
20
+
21
+ result = {"content": content, "path": None}
22
+
23
+ if temp:
24
+ ctype = resp.headers.get("content-type", "")
25
+ ext = mimetypes.guess_extension(ctype) or os.path.splitext(task_id)[1] or ""
26
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
27
+ tmp.write(content)
28
+ tmp.close()
29
+ result["path"] = tmp.file.name
30
+
31
+ print("[fetch_file_content]", result)
32
+ return result