File size: 3,943 Bytes
a39e119
 
574b6ca
 
 
 
7963312
f96a820
61f4b08
757ebd9
e80aab9
3db6293
e80aab9
61f4b08
70fa272
 
 
 
61f4b08
70fa272
61f4b08
70fa272
 
 
 
 
 
 
 
f96a820
70fa272
f96a820
61f4b08
70fa272
 
 
61f4b08
a39e119
7963312
61f4b08
f96a820
 
 
 
 
a39e119
 
70fa272
a39e119
7963312
a39e119
61f4b08
7963312
70fa272
7963312
f96a820
7963312
 
70fa272
61f4b08
70fa272
61f4b08
f96a820
a39e119
 
8f6825e
f96a820
 
31243f4
61f4b08
757ebd9
36ed51a
f96a820
eccf8e4
61f4b08
 
 
a39e119
61f4b08
70fa272
61f4b08
 
 
 
 
f96a820
61f4b08
 
 
31243f4
61f4b08
 
7963312
61f4b08
e80aab9
61f4b08
 
 
70fa272
f96a820
 
 
 
7963312
70fa272
7963312
61f4b08
7963312
61f4b08
7963312
f96a820
7963312
61f4b08
 
 
f96a820
61f4b08
e80aab9
 
f96a820
a39e119
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# app.py

import os
import gradio as gr
import requests
import pandas as pd

from smolagents import CodeAgent, tool
from smolagents.models import LiteLLMModel  # ✅ correct import

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Simple Web Search Tool ---
@tool
def simple_search(query: str) -> str:
    """
    Args:
        query (str): Search query text.
    Returns:
        str: Top 3 DuckDuckGo search result titles & links.
    """
    try:
        resp = requests.get(
            "https://html.duckduckgo.com/html/",
            params={"q": query},
            timeout=10
        )
        resp.raise_for_status()
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(resp.text, "html.parser")
        items = soup.select("a.result__a")[:3]
        return "\n\n".join(f"{a.get_text()}\n{a['href']}" for a in items) or "No results found."
    except Exception as e:
        return f"Search error: {e}"

# --- Enhanced Agent using Light Model ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized with LiteLLMModel (falcon-7b-instruct).")
        self.model = LiteLLMModel(
            model_id="tiiuae/falcon-7b-instruct",
            max_tokens=512,
            temperature=0.1
        )
        self.agent = CodeAgent(
            model=self.model,
            tools=[simple_search]
        )

    def __call__(self, question: str) -> str:
        print(f"Question: {question[:60]}...")
        try:
            return self.agent.run(question)
        except Exception as e:
            return f"Agent error: {e}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please log in to Hugging Face to submit answers.", None
    username = profile.username
    space_id = os.getenv("SPACE_ID", "")

    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Agent initialization failed: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        r = requests.get(questions_url, timeout=15)
        r.raise_for_status()
        questions = r.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    logs, answers = [], []
    for item in questions:
        task_id = item.get("task_id")
        question = item.get("question")
        if not task_id or question is None:
            continue
        ans = agent(question)
        answers.append({"task_id": task_id, "submitted_answer": ans})
        logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": ans})

    if not answers:
        return "Agent produced no answers.", pd.DataFrame(logs)

    payload = {"username": username, "agent_code": agent_code, "answers": answers}
    try:
        resp = requests.post(submit_url, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        status = (
            f"✅ Submission Successful!\n"
            f"Score: {data.get('score','N/A')}% "
            f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
            f"{data.get('message','')}"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(logs)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_box = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
    result_table = gr.DataFrame(label="Questions & Agent Answers", wrap=True)

    run_button.click(run_and_submit_all, outputs=[status_box, result_table])

if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(debug=True, share=False)