File size: 4,600 Bytes
a39e119
 
574b6ca
 
 
7963312
574b6ca
7963312
a39e119
 
757ebd9
e80aab9
3db6293
e80aab9
a39e119
 
7963312
a39e119
 
 
 
 
 
 
 
 
7963312
a39e119
 
7963312
a39e119
 
 
7963312
a39e119
 
7963312
 
 
a39e119
7963312
 
 
8f6825e
7963312
a39e119
7963312
 
 
a39e119
3c4371f
a39e119
 
8f6825e
31243f4
a39e119
31243f4
7963312
 
757ebd9
36ed51a
7963312
3c4371f
a39e119
eccf8e4
a39e119
 
 
7963312
a39e119
7963312
a39e119
7963312
 
e80aab9
a39e119
7d65c66
 
a39e119
31243f4
8f6825e
7963312
31243f4
 
a39e119
 
 
31243f4
a39e119
 
31243f4
7963312
a39e119
7963312
a39e119
 
e80aab9
a39e119
 
 
7963312
 
a39e119
 
 
 
7963312
a39e119
7963312
a39e119
7963312
a39e119
7963312
a39e119
 
 
 
 
7963312
a39e119
 
 
7963312
a39e119
e80aab9
 
a39e119
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# app.py

import os
import gradio as gr
import requests
import inspect
import pandas as pd

# SmolAgents imports
from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Enhanced Agent Definition ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized with real agentic capabilities.")

        # Initialize tools and model
        self.search_tool = DuckDuckGoSearchTool()
        self.model = InferenceClientModel()
        self.agent = CodeAgent(
            model=self.model,
            tools=[self.search_tool]
        )

    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        try:
            response = self.agent.run(question)
            print(f"Agent response (first 50 chars): {response[:50]}...")
            return response
        except Exception as e:
            print(f"Agent error during run: {e}")
            return f"Error in agent: {e}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the BasicAgent on them, submits all answers,
    and displays the results.
    """
    space_id = os.getenv("SPACE_ID")

    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please login to Hugging Face to submit answers.", None

    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    try:
        agent = BasicAgent()
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)

    # Fetch questions
    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions_data = resp.json()
        if not questions_data:
            return "Empty or invalid question list.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None

    # Run agent on questions
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            submitted = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted})
        except Exception as e:
            print(f"Error on task {task_id}: {e}")
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})

    if not answers_payload:
        return "Agent did not produce any answers.", pd.DataFrame(results_log)

    # Prepare & submit
    payload = {"username": username, "agent_code": agent_code, "answers": answers_payload}
    try:
        submit_resp = requests.post(submit_url, json=payload, timeout=60)
        submit_resp.raise_for_status()
        result_json = submit_resp.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_json.get('username')}\n"
            f"Score: {result_json.get('score', 'N/A')}% "
            f"({result_json.get('correct_count', '?')}/{result_json.get('total_attempted', '?')} correct)\n"
            f"Message: {result_json.get('message', '')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown("""
    Modify `BasicAgent` to add more tools or logic.
    Log in, click **Run Evaluation & Submit All Answers**, and watch it process automatically.
    """)
    gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    status = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
    results = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_btn.click(fn=run_and_submit_all, outputs=[status, results])

if __name__ == "__main__":
    print("Launching app...")
    demo.launch(debug=True, share=False)