File size: 3,891 Bytes
a39e119
 
574b6ca
 
 
 
7963312
f96a820
 
757ebd9
e80aab9
3db6293
e80aab9
f96a820
70fa272
 
 
f96a820
70fa272
 
f96a820
70fa272
 
f96a820
70fa272
 
 
 
 
 
 
 
f96a820
70fa272
f96a820
 
 
70fa272
 
 
f96a820
a39e119
7963312
f96a820
 
 
 
 
 
a39e119
 
70fa272
a39e119
7963312
a39e119
f96a820
7963312
70fa272
7963312
f96a820
7963312
 
 
70fa272
f96a820
70fa272
f96a820
a39e119
 
8f6825e
f96a820
 
31243f4
f96a820
757ebd9
36ed51a
f96a820
eccf8e4
f96a820
 
 
a39e119
f96a820
70fa272
 
 
f96a820
 
 
 
70fa272
 
 
31243f4
70fa272
 
7963312
70fa272
e80aab9
f96a820
 
 
70fa272
f96a820
 
 
 
7963312
70fa272
7963312
f96a820
7963312
a39e119
7963312
f96a820
7963312
f96a820
 
 
 
70fa272
e80aab9
 
f96a820
a39e119
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# app.py

import os
import gradio as gr
import requests
import pandas as pd

from smolagents import CodeAgent, tool
from litellm import LiteLLMModel

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Tool: simple DuckDuckGo search via HTML scraping ---
@tool
def simple_search(query: str) -> str:
    """
    Perform a basic DuckDuckGo search.

    Args:
        query (str): Search query.

    Returns:
        str: Top 3 titles and URLs from search.
    """
    try:
        resp = requests.get(
            "https://html.duckduckgo.com/html/",
            params={"q": query},
            timeout=10
        )
        resp.raise_for_status()
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(resp.text, "html.parser")
        items = soup.select("a.result__a")[:3]
        results = [f"{a.get_text()}\n{a['href']}" for a in items]
        return "\n\n".join(results) if results else "No results found."
    except Exception as e:
        return f"Search error: {e}"

# --- Enhanced Agent using a public model ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized with Falcon 7B Instruct model.")
        self.model = LiteLLMModel(
            model_id="tiiuae/falcon-7b-instruct",
            max_tokens=512,
            temperature=0.1
        )
        self.agent = CodeAgent(
            model=self.model,
            tools=[simple_search]
        )

    def __call__(self, question: str) -> str:
        print(f"Received question: {question[:50]}...")
        try:
            return self.agent.run(question)
        except Exception as e:
            return f"Agent error: {e}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if not profile:
        return "Please log in to Hugging Face to submit.", None
    username = profile.username

    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Agent init failed: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        qs = resp.json()
    except Exception as e:
        return f"Failed to fetch questions: {e}", None

    logs, payload = [], []
    for item in qs:
        tid = item.get("task_id")
        q = item.get("question")
        if not tid or q is None:
            continue
        ans = agent(q)
        payload.append({"task_id": tid, "submitted_answer": ans})
        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})

    if not payload:
        return "No answers generated.", pd.DataFrame(logs)

    submit = {"username": username, "agent_code": agent_code, "answers": payload}
    try:
        r2 = requests.post(submit_url, json=submit, timeout=60)
        r2.raise_for_status()
        data = r2.json()
        status = (
            f"✅ Submission Successful!\n"
            f"Score: {data.get('score','N/A')}% "
            f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
            f"{data.get('message','')}"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submit failed: {e}", pd.DataFrame(logs)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    status = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
    results = gr.DataFrame(label="Questions & Submitted Answers", wrap=True)

    run_btn.click(run_and_submit_all, outputs=[status, results])

if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(debug=True, share=False)