LamiaYT's picture
Deploy GAIA agent
d07ba5a
raw
history blame
4.01 kB
# app.py
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, tool
from smolagents.models import LiteLLMModel # ✅ correct import
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Simple Web Search Tool ---
@tool
def simple_search(query: str) -> str:
"""
Performs a DuckDuckGo search and returns the top 3 results.
Args:
query (str): The search query text.
Returns:
str: Titles and links of the top 3 search results.
"""
try:
resp = requests.get(
"https://html.duckduckgo.com/html/",
params={"q": query},
timeout=10
)
resp.raise_for_status()
from bs4 import BeautifulSoup
soup = BeautifulSoup(resp.text, "html.parser")
items = soup.select("a.result__a")[:3]
return "\n\n".join(f"{a.get_text()}\n{a['href']}" for a in items) or "No results found."
except Exception as e:
return f"Search error: {e}"
# --- Enhanced Agent using Light Model ---
class BasicAgent:
def __init__(self):
print("BasicAgent initialized with LiteLLMModel (falcon-7b-instruct).")
self.model = LiteLLMModel(
model_id="tiiuae/falcon-7b-instruct",
max_tokens=512,
temperature=0.1
)
self.agent = CodeAgent(
model=self.model,
tools=[simple_search]
)
def __call__(self, question: str) -> str:
print(f"Question: {question[:60]}...")
try:
return self.agent.run(question)
except Exception as e:
return f"Agent error: {e}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please log in to Hugging Face to submit answers.", None
username = profile.username
space_id = os.getenv("SPACE_ID", "")
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Agent initialization failed: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
r = requests.get(questions_url, timeout=15)
r.raise_for_status()
questions = r.json()
except Exception as e:
return f"Error fetching questions: {e}", None
logs, answers = [], []
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or question is None:
continue
ans = agent(question)
answers.append({"task_id": task_id, "submitted_answer": ans})
logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": ans})
if not answers:
return "Agent produced no answers.", pd.DataFrame(logs)
payload = {"username": username, "agent_code": agent_code, "answers": answers}
try:
resp = requests.post(submit_url, json=payload, timeout=60)
resp.raise_for_status()
data = resp.json()
status = (
f"✅ Submission Successful!\n"
f"Score: {data.get('score','N/A')}% "
f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
f"{data.get('message','')}"
)
return status, pd.DataFrame(logs)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(logs)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_box = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
result_table = gr.DataFrame(label="Questions & Agent Answers", wrap=True)
run_button.click(run_and_submit_all, outputs=[status_box, result_table])
if __name__ == "__main__":
print("Launching Gradio app...")
demo.launch(debug=True, share=False)