File size: 6,679 Bytes
364f05f
f85ab70
10e9b7d
 
eccf8e4
3c4371f
830c198
85d8289
e80aab9
3db6293
ec8845c
364f05f
 
 
2ac3a83
 
364f05f
 
 
31243f4
364f05f
2ac3a83
364f05f
 
 
 
 
2ac3a83
 
364f05f
 
 
 
95afeec
364f05f
 
 
2ac3a83
364f05f
 
2ac3a83
364f05f
2ac3a83
364f05f
 
 
4021bf3
b795696
 
364f05f
 
 
2ac3a83
830c198
7d65c66
364f05f
2ac3a83
830c198
364f05f
2ac3a83
 
364f05f
 
 
7e4a06b
31243f4
 
364f05f
 
31243f4
364f05f
 
31243f4
364f05f
 
 
f85ab70
364f05f
36ed51a
830c198
364f05f
 
830c198
eccf8e4
2ac3a83
7d65c66
31243f4
2ac3a83
830c198
2ac3a83
830c198
f85ab70
830c198
85d8289
364f05f
 
7d65c66
 
364f05f
f85ab70
31243f4
 
 
 
 
364f05f
7d65c66
b795696
ec8845c
b795696
 
 
 
 
ec8845c
b795696
 
31243f4
364f05f
 
 
b795696
 
 
 
364f05f
b795696
 
364f05f
31243f4
364f05f
 
 
 
b795696
 
 
 
 
830c198
e80aab9
2ac3a83
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
364f05f
e80aab9
830c198
85d8289
7d65c66
830c198
 
 
85d8289
 
 
e80aab9
 
8a4a946
e80aab9
364f05f
 
7e4a06b
31243f4
b795696
 
 
7d65c66
2ac3a83
 
 
 
 
e80aab9
 
830c198
f85ab70
830c198
364f05f
b795696
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# app.py (Minimal "Dummy" Version for Step 1)

import os
import gradio as gr
import requests
import pandas as pd
import logging

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# -----------------------------------------------------------------
# --- AGENT LOGIC IS COMPLETELY REMOVED OR COMMENTED OUT ---
# -----------------------------------------------------------------


# We are replacing the entire GaiaAgent with this simple class.
# It has no external dependencies.
class DummyAgent:
    def __init__(self):
        logging.info("DummyAgent initialized. No models or tools loaded.")

    def __call__(self, question: str) -> str:
        logging.info(f"DummyAgent received question: {question[:50]}...")
        fixed_answer = "dummy_answer"
        logging.info(f"DummyAgent returning fixed answer: {fixed_answer}")
        return fixed_answer


# All other imports and tool definitions are removed for this test.
# from langchain_community.llms import HuggingFaceHub
# from langchain_community.tools import DuckDuckGoSearchRun
# ... and so on ...

# All tool definitions are removed.
# @tool
# def web_search...

# The AgentState class is removed.
# class AgentState...

# The GaiaAgent class is replaced by DummyAgent above.

# -----------------------------------------------------------------
# --- GRADIO APP AND SUBMISSION LOGIC (largely unchanged) ---
# -----------------------------------------------------------------


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the DUMMY agent, and submits the answers.
    """
    if not profile:
        logging.warning("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    username = profile.username
    logging.info(f"User logged in: {username}")

    space_id = os.getenv("SPACE_ID")
    if not space_id:
        logging.error("SPACE_ID environment variable is not set.")
        return "CRITICAL ERROR: SPACE_ID environment variable is not set.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent (Using the DummyAgent for this test)
    try:
        # We instantiate our simple, harmless agent.
        agent = DummyAgent()
    except Exception as e:
        logging.critical(
            f"Fatal error instantiating even the DummyAgent: {e}", exc_info=True
        )
        return f"Fatal error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    logging.info(f"Agent code URL: {agent_code}")

    # 2. Fetch Questions
    logging.info(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=20)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            logging.warning("Fetched questions list is empty.")
            return "Fetched questions list is empty.", None
        logging.info(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        logging.error(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None

    # 3. Run your Agent
    results_log = []
    answers_payload = []
    logging.info(f"Running dummy agent on {len(questions_data)} questions...")
    for i, item in enumerate(questions_data):
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            # The agent call is now super fast and simple.
            submitted_answer = agent(question_text)
            answers_payload.append(
                {"task_id": task_id, "submitted_answer": submitted_answer}
            )
            results_log.append(
                {
                    "Task ID": task_id,
                    "Question": question_text,
                    "Submitted Answer": submitted_answer,
                }
            )
        except Exception as e:
            logging.error(
                f"Error running dummy agent on task {task_id}: {e}", exc_info=True
            )
            results_log.append(
                {
                    "Task ID": task_id,
                    "Question": question_text,
                    "Submitted Answer": f"DUMMY AGENT ERROR: {e}",
                }
            )

    if not answers_payload:
        logging.warning("Dummy agent did not produce any answers.")
        return "Dummy agent did not produce any answers.", pd.DataFrame(results_log)

    # 4. Prepare and Submit
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload,
    }
    logging.info(f"Submitting {len(answers_payload)} answers for user '{username}'...")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'This was a test with a dummy agent.')}"
        )
        logging.info("Submission successful.")
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        logging.critical(
            f"An unexpected error occurred during submission: {e}", exc_info=True
        )
        return f"An unexpected error occurred during submission: {e}", pd.DataFrame(
            results_log
        )


# --- Build Gradio Interface (Unchanged) ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner (Minimal Test)")
    gr.Markdown("This is a minimal version to test the basic app stability.")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(
        label="Run Status / Submission Result", lines=5, interactive=False
    )
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table],
        api_name="run_evaluation",
    )

if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )
    logging.info("App Starting (Minimal Version)...")
    demo.launch(debug=True, share=False)