File size: 5,140 Bytes
10e9b7d eccf8e4 3c4371f c43e884 d646f48 c43e884 d646f48 d916609 d646f48 3db6293 d646f48 e80aab9 d646f48 e90a59c ea8d34e d646f48 e90a59c f5312d4 0e37d38 c43e884 0e37d38 31243f4 49aa693 2ba19e9 e7545b5 d1458e5 d646f48 d1458e5 31243f4 0e37d38 f5312d4 d646f48 36ed51a 3c4371f eccf8e4 d646f48 7d65c66 03a5300 0e37d38 7d65c66 0e37d38 e80aab9 03a5300 0e37d38 582b41a 03a5300 31243f4 d646f48 31243f4 4d674d3 0e37d38 d646f48 03a5300 d646f48 03a5300 d646f48 582b41a 03a5300 791c8cd 03a5300 582b41a 03a5300 582b41a d646f48 0e37d38 791c8cd 03a5300 0e37d38 98f0864 03a5300 582b41a d646f48 0e37d38 31243f4 03a5300 2135e75 d646f48 2135e75 d646f48 03a5300 d646f48 e80aab9 03a5300 d646f48 03a5300 d646f48 e80aab9 d646f48 e80aab9 0e37d38 d646f48 7d65c66 0e37d38 d646f48 e80aab9 03a5300 c43e884 03a5300 0e37d38 c43e884 e80aab9 7e4a06b 03a5300 0e37d38 03a5300 0e37d38 e80aab9 0e37d38 7d65c66 03a5300 d646f48 03a5300 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import gradio as gr
import requests
import pandas as pd
import asyncio
import json
from huggingface_hub import login
from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
# --- Hugging Face Login ---
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
# --- Define Tools ---
search_tool = DuckDuckGoSearchTool()
# --- Main Async Function with Progress Logs ---
async def run_and_submit_all(profile: gr.OAuthProfile | None):
log_output = ""
try:
agent = CodeAgent(
tools=[search_tool],
model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
max_steps=5,
verbosity_level=2
)
except Exception as e:
yield f"β Agent Initialization Error: {e}", None, log_output
return
space_id = os.getenv("SPACE_ID", "unknown")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(QUESTIONS_URL, timeout=15)
response.raise_for_status()
questions = response.json()
if not questions:
yield "β οΈ No questions received.", None, log_output
return
except Exception as e:
yield f"β Failed to fetch questions: {e}", None, log_output
return
answers = []
logs = []
loop = asyncio.get_running_loop()
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
log_output += f"π Solving Task ID: {task_id}...\n"
yield None, None, log_output # Live update
system_prompt = (
"You are a general AI assistant. I will ask you a question. "
"Report your thoughts, and finish your answer with the following template: "
"FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n\n"
)
full_prompt = system_prompt + f"Question: {question.strip()}"
try:
result = await loop.run_in_executor(None, lambda: agent(full_prompt))
if isinstance(result, dict) and "final_answer" in result:
final_answer = str(result["final_answer"]).strip()
elif isinstance(result, str):
if "FINAL ANSWER:" in result:
final_answer = result.split("FINAL ANSWER:")[-1].strip()
else:
final_answer = result.strip()
else:
final_answer = str(result).strip()
except Exception as e:
final_answer = f"AGENT ERROR: {e}"
print(f"[ERROR] Task {task_id} failed: {e}")
answers.append({"task_id": task_id, "model_answer": final_answer})
logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
log_output += f"β
Done: {task_id} β Answer: {final_answer[:60]}\n"
yield None, None, log_output # Live update
valid_answers = [a for a in answers if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
if not valid_answers:
yield "β Agent produced no valid answers.", pd.DataFrame(logs), log_output
return
submission = {
"username": profile.username if profile else "unknown",
"agent_code": agent_code,
"answers": valid_answers
}
print("[DEBUG] Submitting:\n", json.dumps(submission, indent=2))
try:
resp = requests.post(SUBMIT_URL, json=submission, timeout=60)
resp.raise_for_status()
result_data = resp.json()
summary = (
f"β
Submission Successful\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
f"Message: {result_data.get('message', 'No message.')}"
)
yield summary, pd.DataFrame(logs), log_output
except Exception as e:
yield f"β Submission failed: {e}", pd.DataFrame(logs), log_output
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# π§ GAIA Agent Evaluation Interface")
gr.Markdown("""
- Log in with your Hugging Face account.
- Click the button below to run the agent and submit the answers.
- Watch the log to see which question is being solved in real-time.
""")
gr.LoginButton()
run_button = gr.Button("π Run Evaluation & Submit")
status = gr.Textbox(label="Final Status", lines=6)
table = gr.DataFrame(label="Answer Log")
progress_log = gr.Textbox(label="Live Progress Log", lines=10, interactive=False)
run_button.click(fn=run_and_submit_all, outputs=[status, table, progress_log])
# --- Launch ---
if __name__ == "__main__":
print("Launching Agent Space...")
demo.launch(debug=True)
|