LamiaYT commited on
Commit
9ceeea7
·
1 Parent(s): 003104b
Files changed (2) hide show
  1. app.py +3 -3
  2. lang.txt +37 -56
app.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import inspect
5
  import pandas as pd
6
  from agent import build_graph
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
@@ -124,7 +125,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
124
  error_json = e.response.json()
125
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
126
  except requests.exceptions.JSONDecodeError:
127
- error_detail += f" Response: {e.response.text[:500]}"
128
  status_message = f"Submission Failed: {error_detail}"
129
  print(status_message)
130
  results_df = pd.DataFrame(results_log)
@@ -197,5 +198,4 @@ if __name__ == "__main__":
197
  print("-"*(60 + len(" App Starting ")) + "\n")
198
 
199
  print("Launching Gradio Interface for Basic Agent Evaluation...")
200
- demo.launch(debug=True, share=False)
201
-
 
4
  import inspect
5
  import pandas as pd
6
  from agent import build_graph
7
+ from langchain_core.messages import HumanMessage # Add this import
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
 
125
  error_json = e.response.json()
126
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
127
  except requests.exceptions.JSONDecodeError:
128
+ error_detail += f" Response: {e.response.text[:500]"
129
  status_message = f"Submission Failed: {error_detail}"
130
  print(status_message)
131
  results_df = pd.DataFrame(results_log)
 
198
  print("-"*(60 + len(" App Starting ")) + "\n")
199
 
200
  print("Launching Gradio Interface for Basic Agent Evaluation...")
201
+ demo.launch(debug=True, share=False)
 
lang.txt CHANGED
@@ -199,130 +199,111 @@ if __name__ == "__main__":
199
 
200
  print("Launching Gradio Interface for Basic Agent Evaluation...")
201
  demo.launch(debug=True, share=False)
 
202
  Final_Assignment_Template\agent.py:
203
  import os
 
204
  from dotenv import load_dotenv
 
205
 
206
- # Load environment variables
207
  load_dotenv()
208
-
209
- # Set protobuf implementation to avoid C++ extension issues
210
  os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
211
-
212
- # Load keys from environment
213
- groq_api_key = os.getenv("GROQ_API_KEY")
214
- serper_api_key = os.getenv("SERPER_API_KEY")
215
  hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
216
 
217
- # ---- Imports ----
218
  from langgraph.graph import START, StateGraph, MessagesState
219
  from langgraph.prebuilt import tools_condition, ToolNode
220
- from langchain_google_genai import ChatGoogleGenerativeAI
221
- from langchain_groq import ChatGroq
222
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
 
223
  from langchain_community.tools.tavily_search import TavilySearchResults
224
  from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
225
  from langchain_community.vectorstores import Chroma
226
- from langchain_core.documents import Document
227
  from langchain_core.messages import SystemMessage, HumanMessage
228
  from langchain_core.tools import tool
229
- from langchain.tools.retriever import create_retriever_tool
230
- from langchain.vectorstores import Chroma
231
- from langchain.embeddings import HuggingFaceEmbeddings
232
  from langchain.schema import Document
233
- import json
234
 
235
- # ---- Tools ----
236
 
237
  @tool
238
  def multiply(a: int, b: int) -> int:
 
239
  return a * b
240
 
241
  @tool
242
  def add(a: int, b: int) -> int:
 
243
  return a + b
244
 
245
  @tool
246
  def subtract(a: int, b: int) -> int:
 
247
  return a - b
248
 
249
  @tool
250
  def divide(a: int, b: int) -> float:
 
251
  if b == 0:
252
  raise ValueError("Cannot divide by zero.")
253
  return a / b
254
 
255
  @tool
256
  def modulus(a: int, b: int) -> int:
 
257
  return a % b
258
 
259
  @tool
260
  def wiki_search(query: str) -> str:
 
261
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
262
  formatted = "\n\n---\n\n".join(
263
- [
264
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
265
- for doc in search_docs
266
- ]
267
  )
268
  return {"wiki_results": formatted}
269
 
270
  @tool
271
  def web_search(query: str) -> str:
 
272
  search_docs = TavilySearchResults(max_results=3).invoke(query=query)
273
  formatted = "\n\n---\n\n".join(
274
- [
275
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
276
- for doc in search_docs
277
- ]
278
  )
279
  return {"web_results": formatted}
280
 
281
  @tool
282
  def arvix_search(query: str) -> str:
 
283
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
284
  formatted = "\n\n---\n\n".join(
285
- [
286
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
287
- for doc in search_docs
288
- ]
289
  )
290
  return {"arvix_results": formatted}
291
 
292
- # ---- Embedding & Vector Store Setup ----
293
-
294
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
295
-
296
- json_QA = []
297
- with open('metadata.jsonl', 'r') as jsonl_file:
298
- for line in jsonl_file:
299
- json_QA.append(json.loads(line))
300
-
301
  documents = [
302
  Document(
303
  page_content=f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}",
304
  metadata={"source": sample["task_id"]}
305
- )
306
- for sample in json_QA
307
  ]
308
-
309
  vector_store = Chroma.from_documents(
310
  documents=documents,
311
  embedding=embeddings,
312
  persist_directory="./chroma_db",
313
  collection_name="my_collection"
314
  )
315
- vector_store.persist()
316
  print("Documents inserted:", vector_store._collection.count())
317
 
318
  @tool
319
  def similar_question_search(query: str) -> str:
 
320
  matched_docs = vector_store.similarity_search(query, 3)
321
  formatted = "\n\n---\n\n".join(
322
- [
323
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
324
- for doc in matched_docs
325
- ]
326
  )
327
  return {"similar_questions": formatted}
328
 
@@ -334,29 +315,28 @@ Now, I will ask you a question. Report your thoughts, and finish your answer wit
334
  FINAL ANSWER: [YOUR FINAL ANSWER].
335
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings...
336
  """
337
-
338
  sys_msg = SystemMessage(content=system_prompt)
339
 
340
- # ---- Tool List ----
341
-
342
  tools = [
343
  multiply, add, subtract, divide, modulus,
344
  wiki_search, web_search, arvix_search, similar_question_search
345
  ]
346
 
347
- # ---- Graph Definition ----
348
 
349
- def build_graph(provider: str = "groq"):
350
- if provider == "groq":
351
- llm = ChatGroq(model="qwen-qwq-32b", temperature=0, api_key=groq_api_key)
352
- elif provider == "google":
353
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
354
- elif provider == "huggingface":
355
  llm = ChatHuggingFace(
356
- llm=HuggingFaceEndpoint(repo_id="mosaicml/mpt-30b", temperature=0)
 
 
 
 
357
  )
 
 
358
  else:
359
- raise ValueError("Invalid provider: choose 'groq', 'google', or 'huggingface'.")
360
 
361
  llm_with_tools = llm.bind_tools(tools)
362
 
@@ -380,6 +360,7 @@ def build_graph(provider: str = "groq"):
380
  builder.add_edge("tools", "assistant")
381
 
382
  return builder.compile()
 
383
  Final_Assignment_Template\metadata.jsonl:
384
 
385
  Final_Assignment_Template\requirements.txt:
 
199
 
200
  print("Launching Gradio Interface for Basic Agent Evaluation...")
201
  demo.launch(debug=True, share=False)
202
+
203
  Final_Assignment_Template\agent.py:
204
  import os
205
+ import json
206
  from dotenv import load_dotenv
207
+ from langchain_core.messages import HumanMessage
208
 
 
209
  load_dotenv()
 
 
210
  os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 
 
 
 
211
  hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
212
 
 
213
  from langgraph.graph import START, StateGraph, MessagesState
214
  from langgraph.prebuilt import tools_condition, ToolNode
 
 
215
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
216
+ from langchain_google_genai import ChatGoogleGenerativeAI
217
  from langchain_community.tools.tavily_search import TavilySearchResults
218
  from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
219
  from langchain_community.vectorstores import Chroma
 
220
  from langchain_core.messages import SystemMessage, HumanMessage
221
  from langchain_core.tools import tool
 
 
 
222
  from langchain.schema import Document
 
223
 
224
+ # ---- Tool Definitions (with docstrings) ----
225
 
226
  @tool
227
  def multiply(a: int, b: int) -> int:
228
+ """Multiply two integers and return the result."""
229
  return a * b
230
 
231
  @tool
232
  def add(a: int, b: int) -> int:
233
+ """Add two integers and return the result."""
234
  return a + b
235
 
236
  @tool
237
  def subtract(a: int, b: int) -> int:
238
+ """Subtract second integer from the first and return the result."""
239
  return a - b
240
 
241
  @tool
242
  def divide(a: int, b: int) -> float:
243
+ """Divide first integer by second and return the result as a float."""
244
  if b == 0:
245
  raise ValueError("Cannot divide by zero.")
246
  return a / b
247
 
248
  @tool
249
  def modulus(a: int, b: int) -> int:
250
+ """Return the remainder when first integer is divided by second."""
251
  return a % b
252
 
253
  @tool
254
  def wiki_search(query: str) -> str:
255
+ """Search Wikipedia for the query and return text of up to 2 documents."""
256
  search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
257
  formatted = "\n\n---\n\n".join(
258
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
259
+ for doc in search_docs
 
 
260
  )
261
  return {"wiki_results": formatted}
262
 
263
  @tool
264
  def web_search(query: str) -> str:
265
+ """Search the web for the query using Tavily and return up to 3 results."""
266
  search_docs = TavilySearchResults(max_results=3).invoke(query=query)
267
  formatted = "\n\n---\n\n".join(
268
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
269
+ for doc in search_docs
 
 
270
  )
271
  return {"web_results": formatted}
272
 
273
  @tool
274
  def arvix_search(query: str) -> str:
275
+ """Search Arxiv for the query and return content from up to 3 papers."""
276
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
277
  formatted = "\n\n---\n\n".join(
278
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
279
+ for doc in search_docs
 
 
280
  )
281
  return {"arvix_results": formatted}
282
 
283
+ # Build vector store once
 
284
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
285
+ json_QA = [json.loads(line) for line in open("metadata.jsonl", "r")]
 
 
 
 
 
286
  documents = [
287
  Document(
288
  page_content=f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}",
289
  metadata={"source": sample["task_id"]}
290
+ ) for sample in json_QA
 
291
  ]
 
292
  vector_store = Chroma.from_documents(
293
  documents=documents,
294
  embedding=embeddings,
295
  persist_directory="./chroma_db",
296
  collection_name="my_collection"
297
  )
 
298
  print("Documents inserted:", vector_store._collection.count())
299
 
300
  @tool
301
  def similar_question_search(query: str) -> str:
302
+ """Search for questions similar to the input query using the vector store."""
303
  matched_docs = vector_store.similarity_search(query, 3)
304
  formatted = "\n\n---\n\n".join(
305
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
306
+ for doc in matched_docs
 
 
307
  )
308
  return {"similar_questions": formatted}
309
 
 
315
  FINAL ANSWER: [YOUR FINAL ANSWER].
316
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings...
317
  """
 
318
  sys_msg = SystemMessage(content=system_prompt)
319
 
 
 
320
  tools = [
321
  multiply, add, subtract, divide, modulus,
322
  wiki_search, web_search, arvix_search, similar_question_search
323
  ]
324
 
325
+ # ---- Graph Builder ----
326
 
327
+ def build_graph(provider: str = "huggingface"):
328
+ if provider == "huggingface":
 
 
 
 
329
  llm = ChatHuggingFace(
330
+ llm=HuggingFaceEndpoint(
331
+ repo_id="mosaicml/mpt-30b",
332
+ temperature=0,
333
+ huggingfacehub_api_token=hf_token
334
+ )
335
  )
336
+ elif provider == "google":
337
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
338
  else:
339
+ raise ValueError("Invalid provider: choose 'huggingface' or 'google'.")
340
 
341
  llm_with_tools = llm.bind_tools(tools)
342
 
 
360
  builder.add_edge("tools", "assistant")
361
 
362
  return builder.compile()
363
+
364
  Final_Assignment_Template\metadata.jsonl:
365
 
366
  Final_Assignment_Template\requirements.txt: