File size: 4,642 Bytes
10e9b7d eccf8e4 3c4371f c43e884 d646f48 c43e884 d646f48 d916609 d646f48 3db6293 d646f48 e80aab9 d646f48 e90a59c ea8d34e d646f48 e90a59c f5312d4 03a5300 c43e884 d646f48 31243f4 49aa693 2ba19e9 e7545b5 d1458e5 d646f48 d1458e5 31243f4 03a5300 f5312d4 d646f48 36ed51a 3c4371f d646f48 eccf8e4 d646f48 7d65c66 03a5300 7d65c66 03a5300 e80aab9 03a5300 582b41a 03a5300 31243f4 d646f48 31243f4 4d674d3 d646f48 03a5300 d646f48 03a5300 d646f48 582b41a 03a5300 791c8cd 03a5300 582b41a 03a5300 582b41a d646f48 791c8cd 03a5300 98f0864 03a5300 582b41a d646f48 03a5300 31243f4 03a5300 2135e75 d646f48 2135e75 d646f48 03a5300 d646f48 e80aab9 03a5300 d646f48 03a5300 d646f48 e80aab9 d646f48 e80aab9 03a5300 d646f48 7d65c66 03a5300 d646f48 e80aab9 03a5300 c43e884 03a5300 c43e884 e80aab9 7e4a06b 03a5300 e80aab9 03a5300 7d65c66 03a5300 d646f48 03a5300 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import os
import gradio as gr
import requests
import pandas as pd
import asyncio
import json
from huggingface_hub import login
from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
SUBMIT_URL = f"{DEFAULT_API_URL}/submit"
# --- Hugging Face Login ---
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
# --- Define Tools ---
search_tool = DuckDuckGoSearchTool()
# --- Main Async Function ---
async def run_and_submit_all(profile: gr.OAuthProfile | None):
# Initialize Agent
try:
agent = CodeAgent(
tools=[search_tool],
model=InferenceClientModel(model="mistralai/Magistral-Small-2506"),
max_steps=5,
verbosity_level=2
)
except Exception as e:
return f"β Agent Initialization Error: {e}", None
space_id = os.getenv("SPACE_ID", "unknown")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# Fetch questions
try:
response = requests.get(QUESTIONS_URL, timeout=15)
response.raise_for_status()
questions = response.json()
if not questions:
return "β οΈ No questions received.", None
except Exception as e:
return f"β Failed to fetch questions: {e}", None
answers = []
logs = []
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
system_prompt = (
"You are a general AI assistant. I will ask you a question. "
"Report your thoughts, and finish your answer with the following template: "
"FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.\n\n"
)
full_prompt = system_prompt + f"Question: {question.strip()}"
try:
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, lambda: agent(full_prompt))
if isinstance(result, dict) and "final_answer" in result:
final_answer = str(result["final_answer"]).strip()
elif isinstance(result, str):
if "FINAL ANSWER:" in result:
final_answer = result.split("FINAL ANSWER:")[-1].strip()
else:
final_answer = result.strip()
else:
final_answer = str(result).strip()
except Exception as e:
print(f"[ERROR] Task {task_id} failed: {e}")
final_answer = f"AGENT ERROR: {e}"
answers.append({"task_id": task_id, "model_answer": final_answer})
logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": final_answer})
valid_answers = [a for a in answers if isinstance(a["task_id"], str) and isinstance(a["model_answer"], str)]
if not valid_answers:
return "β Agent produced no valid answers.", pd.DataFrame(logs)
submission = {
"username": profile.username if profile else "unknown",
"agent_code": agent_code,
"answers": valid_answers
}
print("[DEBUG] Submitting:\n", json.dumps(submission, indent=2))
try:
resp = requests.post(SUBMIT_URL, json=submission, timeout=60)
resp.raise_for_status()
result_data = resp.json()
summary = (
f"β
Submission Successful\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
f"Message: {result_data.get('message', 'No message.')}"
)
return summary, pd.DataFrame(logs)
except Exception as e:
return f"β Submission failed: {e}", pd.DataFrame(logs)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# π§ GAIA Agent Evaluation Interface")
gr.Markdown("""
- Log in with your Hugging Face account.
- Click the button below to run the agent and submit the answers.
- Wait for the final score to appear.
""")
gr.LoginButton()
run_button = gr.Button("π Run Evaluation & Submit")
status = gr.Textbox(label="Status", lines=6)
table = gr.DataFrame(label="Answer Log")
run_button.click(fn=run_and_submit_all, outputs=[status, table])
# --- Launch ---
if __name__ == "__main__":
print("Launching Agent Space...")
demo.launch(debug=True)
|