LamiaYT's picture
Deploy GAIA agent
f96a820
raw
history blame
3.89 kB
# app.py
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, tool
from litellm import LiteLLMModel
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Tool: simple DuckDuckGo search via HTML scraping ---
@tool
def simple_search(query: str) -> str:
"""
Perform a basic DuckDuckGo search.
Args:
query (str): Search query.
Returns:
str: Top 3 titles and URLs from search.
"""
try:
resp = requests.get(
"https://html.duckduckgo.com/html/",
params={"q": query},
timeout=10
)
resp.raise_for_status()
from bs4 import BeautifulSoup
soup = BeautifulSoup(resp.text, "html.parser")
items = soup.select("a.result__a")[:3]
results = [f"{a.get_text()}\n{a['href']}" for a in items]
return "\n\n".join(results) if results else "No results found."
except Exception as e:
return f"Search error: {e}"
# --- Enhanced Agent using a public model ---
class BasicAgent:
def __init__(self):
print("BasicAgent initialized with Falcon 7B Instruct model.")
self.model = LiteLLMModel(
model_id="tiiuae/falcon-7b-instruct",
max_tokens=512,
temperature=0.1
)
self.agent = CodeAgent(
model=self.model,
tools=[simple_search]
)
def __call__(self, question: str) -> str:
print(f"Received question: {question[:50]}...")
try:
return self.agent.run(question)
except Exception as e:
return f"Agent error: {e}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please log in to Hugging Face to submit.", None
username = profile.username
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Agent init failed: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
qs = resp.json()
except Exception as e:
return f"Failed to fetch questions: {e}", None
logs, payload = [], []
for item in qs:
tid = item.get("task_id")
q = item.get("question")
if not tid or q is None:
continue
ans = agent(q)
payload.append({"task_id": tid, "submitted_answer": ans})
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
if not payload:
return "No answers generated.", pd.DataFrame(logs)
submit = {"username": username, "agent_code": agent_code, "answers": payload}
try:
r2 = requests.post(submit_url, json=submit, timeout=60)
r2.raise_for_status()
data = r2.json()
status = (
f"✅ Submission Successful!\n"
f"Score: {data.get('score','N/A')}% "
f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
f"{data.get('message','')}"
)
return status, pd.DataFrame(logs)
except Exception as e:
return f"Submit failed: {e}", pd.DataFrame(logs)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers")
status = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
results = gr.DataFrame(label="Questions & Submitted Answers", wrap=True)
run_btn.click(run_and_submit_all, outputs=[status, results])
if __name__ == "__main__":
print("Launching Gradio app...")
demo.launch(debug=True, share=False)