File size: 4,976 Bytes
10e9b7d
d68986e
6c75f10
10e9b7d
eccf8e4
3c4371f
d68986e
6c75f10
10e9b7d
3db6293
e80aab9
d68986e
 
 
 
 
 
31243f4
 
6c75f10
d68986e
4f87fc6
 
 
d68986e
4021bf3
6c75f10
 
7e4a06b
d68986e
7e4a06b
6c75f10
3c4371f
7e4a06b
31243f4
 
4f87fc6
e80aab9
31243f4
6c75f10
31243f4
 
6c75f10
36ed51a
3c4371f
eccf8e4
31243f4
7d65c66
31243f4
7d65c66
6c75f10
 
 
 
 
 
e80aab9
7d65c66
 
31243f4
6c75f10
 
 
31243f4
 
4f87fc6
6c75f10
 
31243f4
6c75f10
31243f4
 
6c75f10
 
 
 
31243f4
6c75f10
 
 
 
 
e80aab9
 
7d65c66
e80aab9
 
31243f4
6c75f10
e80aab9
6c75f10
 
 
e80aab9
6c75f10
7d65c66
6c75f10
 
 
 
 
 
 
 
 
e80aab9
 
 
31243f4
e80aab9
7e4a06b
e80aab9
6c75f10
 
 
e80aab9
6c75f10
 
e80aab9
6c75f10
 
 
 
e80aab9
 
6c75f10
 
 
 
 
7d65c66
6c75f10
 
 
3c4371f
6c75f10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import asyncio
import argparse
import gradio as gr
import requests
import pandas as pd
from agno.agent import RunResponse
from agent import agent

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


async def _async_answer(answer_text: str) -> str:
    response: RunResponse = await agent.arun(answer_text)
    return response.content


class BasicAgent:
    def __init__(self):
        pass

    def __call__(self, task_id: str, question: str) -> str:
        print("[INFO] Answering question: >>>", question)
        return asyncio.run(_async_answer(f"{task_id}: {question}"))


def run_agent(profile: gr.OAuthProfile | None, task_id: str | None = None, submit: bool = True):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = f"{profile.username}"
    else:
        return "Please log in to Hugging Face.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    files_url = f"{api_url}/files/{task_id}"

    try:
        agent_instance = BasicAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    if task_id:
        questions_data = [q for q in questions_data if str(q.get("task_id")) == str(task_id)]
        if not questions_data:
            return f"Task {task_id} not found.", None

    results_log = []
    answers_payload = []
    for item in questions_data:
        tid = item.get("task_id")
        qtext = item.get("question")
        if not tid or qtext is None:
            continue
        try:
            submitted_answer = agent_instance(task_id, qtext)
            answers_payload.append({"task_id": tid, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": submitted_answer})
        except Exception as e:
            results_log.append({"Task ID": tid, "Question": qtext, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        return "No answers produced.", pd.DataFrame(results_log)

    if not submit:
        return "Test mode: nothing submitted.", pd.DataFrame(results_log)

    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload,
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})\n"
            f"Message: {result_data.get('message', '')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)


def run_agent_single(profile: gr.OAuthProfile | None, task_id: str):
    return run_agent(profile, task_id or None, submit=False)


def run_agent_all(profile: gr.OAuthProfile | None, task_id: str):
    return run_agent(profile, task_id or None, submit=True)


with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")

    gr.LoginButton()

    task_id_input = gr.Textbox(label="Task ID (optional)", placeholder="e.g. 2023060607")
    run_test_button = gr.Button("Test Single Task (no submit)")
    run_all_button = gr.Button("Run & Submit All")

    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Results", wrap=True)

    run_test_button.click(
        fn=run_agent_single,
        inputs=[task_id_input],
        outputs=[status_output, results_table],
    )

    run_all_button.click(
        fn=run_agent_all,
        inputs=[task_id_input],
        outputs=[status_output, results_table],
    )

    gr.Markdown(
        "Running all tasks may take time. Use the single‑task button to debug quickly."
    )

if __name__ == "__main__":
    space_host = os.getenv("SPACE_HOST")
    space_id = os.getenv("SPACE_ID")
    if space_host:
        print(f"SPACE_HOST: {space_host}")
    if space_id:
        print(f"SPACE_ID: {space_id}")

    parser = argparse.ArgumentParser()
    parser.add_argument("--task-id", help="Run a single task locally without submission")
    args, _ = parser.parse_known_args()

    if args.task_id:
        status, table = run_agent(profile=None, task_id=args.task_id, submit=False)
        print(status)
        if table is not None:
            print(table)
    else:
        demo.launch(debug=True, share=False)